Adel Abu Hashim & Mahmoud Nagy - August 2021
This case study aims to help Amber Heard
By analyzing new accounts posting/ commenting against a victim of a Social Bot Disinformation/Influence Operation.
We have three main datasets:
(The datasets screaped from reddit).
- 1- A dataset with submissions & comments data (2021).
- 2- Users Data (from 2006 to 2021).
- 3- A merged dataset (submissions & comments data, users data).
- 4- Daily creation data (# of accounts created per day from 2006 to 2021)
#import dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import helpers
import matplotlib.dates as mdates
import plotly.express as px
import plotly.graph_objects as go
import re
import warnings
warnings.filterwarnings('ignore')
sb.set_style("darkgrid")
%matplotlib inline
# load data
df = pd.read_csv("cleaned_data/reddit_cleaned_2021.csv")
df_merged = pd.read_csv("cleaned_data/reddit_merged_2021.csv")
df_users = pd.read_csv("cleaned_data/users_cleaned.csv")
# convert to datetime
df.created_at = pd.to_datetime(df.created_at)
df_merged.created_at = pd.to_datetime(df_merged.created_at)
df_merged.user_created_at = pd.to_datetime(df_merged.user_created_at)
df_users.user_created_at = pd.to_datetime(df_users.user_created_at)
Reddit Contributions (Comments / Submissions)¶
px.bar(data_frame=df['submission_comment'].value_counts().to_frame().reset_index(),
x="index", y="submission_comment", color='submission_comment').update_layout(title='Comment or Submission',
xaxis_title='contribution category',
yaxis_title='number of contributions').update_traces(marker_color='#5296dd')
df.author.value_counts().to_frame().head(10).reset_index()
| index | author | |
|---|---|---|
| 0 | -banned- | 1587 |
| 1 | AutoModerator | 515 |
| 2 | CelebBattleVoteBot | 163 |
| 3 | LoveAmberHeard42286 | 124 |
| 4 | charliedba | 99 |
| 5 | Stanley_Elkind | 44 |
| 6 | Truthbetheprejudice | 43 |
| 7 | gaul66 | 37 |
| 8 | Beatplayer | 32 |
| 9 | sadwook | 32 |
fig = px.bar(df.author.value_counts().to_frame().head(10).reset_index(), x="author", y="index",
height=500,
title='Most commented user in 2021').update_traces(marker_color='#5296dd',).update_layout(
xaxis_title='number of comments',
yaxis_title='user name').update_traces(marker_color='#5296dd')
fig.update_yaxes(autorange="reversed")
df.author.value_counts().to_frame().head(10).reset_index()
| index | author | |
|---|---|---|
| 0 | -banned- | 1587 |
| 1 | AutoModerator | 515 |
| 2 | CelebBattleVoteBot | 163 |
| 3 | LoveAmberHeard42286 | 124 |
| 4 | charliedba | 99 |
| 5 | Stanley_Elkind | 44 |
| 6 | Truthbetheprejudice | 43 |
| 7 | gaul66 | 37 |
| 8 | Beatplayer | 32 |
| 9 | sadwook | 32 |
AutoModerator is a system built into reddit that allows moderators to define "rules" (consisting of checks and actions) to be automatically applied to posts in their subreddit.
df_auto_moderator = df.query(" author == 'AutoModerator' ").reset_index(drop=True)
print(df_auto_moderator.shape)
df_auto_moderator.head(1)
(515, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t1_gho6m2p | /r/AskReddit/comments/ko448x/i_strongly_enjoy_... | **PLEASE READ THIS MESSAGE IN ITS ENTIRETY BEF... | t3_ko448x | r/AskReddit | AutoModerator | 2021-01-01 03:25:15 | Positive | Positive | 1.0 | submission | comment | 155 | i_strongly_enjoy_amber_heard_as_a_person_and | 9 | ['http://www.reddit.com'] | 1 |
df_auto_moderator.subreddit.value_counts().head(10)
r/JerkOffToCelebs 176 r/memes 55 r/Celebhub 48 r/unpopularopinion 32 r/AskReddit 30 r/DC_Cinematic 26 r/CelebBattleLeague 22 r/OutOfTheLoop 18 r/iamatotalpieceofshit 17 r/jerkbudss 6 Name: subreddit, dtype: int64
df_auto_moderator['permalink'].iloc[26]
'/r/darkjokes/comments/kq9r9d/if_relationship_of_amber_heard_and_johnny_depp/gi2itdv/'
df_auto_moderator.text.value_counts().head();
This is a vote bot
df_vote_bot = df.query(" author == 'CelebBattleVoteBot' ").reset_index(drop=True)
print(df_vote_bot.shape)
df_vote_bot.head()
(163, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t3_ko1ew0 | /r/CelebbattlePolls/comments/ko1ew0/marvel_vs_... | Marvel vs DC : Team Marvel (Evangeline Lilly B... | NaN | r/CelebbattlePolls | CelebBattleVoteBot | 2021-01-01 00:23:29 | Neutral | Positive | 1.0 | NaN | submission | 25 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | [] | 0 |
| 1 | t1_ghnp3w4 | /r/CelebbattlePolls/comments/ko1ew0/marvel_vs_... | Poll for [Marvel vs DC : Team Marvel (Evangeli... | t3_ko1ew0 | r/CelebbattlePolls | CelebBattleVoteBot | 2021-01-01 00:23:30 | Neutral | Positive | 1.0 | submission | comment | 29 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | ['https://reddit.com'] | 1 |
| 2 | t1_ghnp3wq | /r/CelebBattles/comments/ko1duk/marvel_vs_dc_t... | Vote here: https://www.reddit.com/poll/ko1ew0\... | t3_ko1duk | r/CelebBattles | CelebBattleVoteBot | 2021-01-01 00:23:30 | Positive | Neutral | 1.0 | submission | comment | 12 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | ['https://www.reddit.com'] | 1 |
| 3 | t3_kp2soe | /r/CelebbattlePolls/comments/kp2soe/rachel_mca... | Rachel McAdams vs Amber Heard | NaN | r/CelebbattlePolls | CelebBattleVoteBot | 2021-01-02 18:35:07 | Neutral | Neutral | 2.0 | NaN | submission | 5 | rachel_mcadams_vs_amber_heard | 5 | [] | 0 |
| 4 | t1_ghul208 | /r/CelebbattlePolls/comments/kp2soe/rachel_mca... | Poll for [Rachel McAdams vs Amber Heard](https... | t3_kp2soe | r/CelebbattlePolls | CelebBattleVoteBot | 2021-01-02 18:35:07 | Neutral | Neutral | 1.0 | submission | comment | 9 | rachel_mcadams_vs_amber_heard | 5 | ['https://reddit.com'] | 1 |
df_vote_bot['permalink'].iloc[0]
'/r/CelebbattlePolls/comments/ko1ew0/marvel_vs_dc_team_marvel_evangeline_lilly_brie/'
df_vote_bot.subreddit.value_counts().head(10)
r/CelebbattlePolls 83 r/CelebBattles 79 r/JerkOffToCelebs 1 Name: subreddit, dtype: int64
df_vote_bot.text.value_counts().head()
Amanda Seyfried VS. Amber Heard 2 Anne Hathaway vs Amber Heard 2 Poll for [Amber Heard vs Torrie Wilson](https://reddit.com/lp7af8) on CelebBattles 1 Poll for [Katy Perry vs Evangeline Lilly vs Amber Heard](https://reddit.com/l3eo1l) on CelebBattles 1 Poll for [Zoe Saldana vs Amber Heard](https://reddit.com/n958ue) on CelebBattles 1 Name: text, dtype: int64
Positive Submissions
df_charliedba = df.query(" author == 'charliedba' ").reset_index(drop=True)
print(df_charliedba.shape)
df_charliedba.head()
(99, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t3_ms8xkq | /r/Amber_Heard/comments/ms8xkq/long_legged_bea... | Long legged beauty [Amber Heard] | NaN | r/Amber_Heard | charliedba | 2021-04-16 18:07:26 | Negative | Positive | 1.0 | NaN | submission | 5 | Amber_Heard | 2 | [] | 0 |
| 1 | t3_msbo0x | /r/BeautifulFemales/comments/msbo0x/amber_hear... | Amber Heard [irtr] | NaN | r/BeautifulFemales | charliedba | 2021-04-16 20:23:46 | Neutral | Neutral | 29.0 | NaN | submission | 3 | amber_heard_irtr | 3 | [] | 0 |
| 2 | t3_mtkdvb | /r/Amber_Heard/comments/mtkdvb/angelic_smile_a... | Angelic smile [Amber Heard] | NaN | r/Amber_Heard | charliedba | 2021-04-18 20:08:57 | Positive | Positive | 1.0 | NaN | submission | 4 | Amber_Heard | 2 | [] | 0 |
| 3 | t3_mtkfg1 | /r/UHQcelebs/comments/mtkfg1/amber_heard_2100_... | Amber Heard [2100 x 3150] | NaN | r/UHQcelebs | charliedba | 2021-04-18 20:11:13 | Neutral | Neutral | 55.0 | NaN | submission | 5 | amber_heard_2100_x_3150 | 5 | [] | 0 |
| 4 | t3_mtkgku | /r/FamousFaces/comments/mtkgku/amber_heard_210... | Amber Heard [2100 x 3150] | NaN | r/FamousFaces | charliedba | 2021-04-18 20:12:54 | Neutral | Neutral | 5.0 | NaN | submission | 5 | amber_heard_2100_x_3150 | 5 | [] | 0 |
df_charliedba['permalink'].iloc[0]
'/r/Amber_Heard/comments/ms8xkq/long_legged_beauty_amber_heard/'
df_charliedba.subreddit.value_counts().head(10)
r/Amber_Heard_2 80 r/Amber_Heard 7 r/HighResCelebs 3 r/UHQcelebs 3 r/BeautifulFemales 2 r/Celebhub 1 r/PrettyWomen 1 r/Celebs 1 r/FamousFaces 1 Name: subreddit, dtype: int64
df_charliedba.text.value_counts().head(10)
Amber Heard 31 Stunning Amber Heard 4 Cute Amber Heard 3 Amber Heard [2100 x 3150] 3 Gorgeous Amber Heard 3 Angelic Amber Heard 3 Amber Heard [irtr] 2 Young Amber Heard 2 Amber Heard [2218 x 3000] 2 Amber Heard collage 2 Name: text, dtype: int64
posting negative comments (related to sex).
# check the date this account was creted
# user_name --> index
# at[index_value , 'column']
# df_users.set_index('user_name').at['Stanley_Elkind', 'user_created_at']
df_users[df_users.user_name == 'Stanley_Elkind']
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 65169 | Stanley_Elkind | True | False | False | False | 22104.0 | 30001.0 | 2021-01-25 02:04:24 | others | 2021 |
df_stanley = df.query(" author == 'Stanley_Elkind' ").reset_index(drop=True)
print(df_stanley.shape)
df_stanley.head()
(44, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t1_gmzc8ly | /r/JerkOffToCelebs/comments/lhu3nv/amber_heard... | I would put it in every one of her holes | t3_lhu3nv | r/JerkOffToCelebs | Stanley_Elkind | 2021-02-11 20:42:48 | Neutral | Neutral | 3.0 | submission | comment | 10 | amber_heard | 2 | [] | 0 |
| 1 | t1_gnf1pv3 | /r/DC_Cinematic/comments/ljqbut/discussion_hon... | Everyone is cool burgeoning cult leader Jared ... | t3_ljqbut | r/DC_Cinematic | Stanley_Elkind | 2021-02-14 15:28:05 | Positive | Positive | 1.0 | submission | comment | 9 | DC_Cinematic | 2 | [] | 0 |
| 2 | t1_gni2nrr | /r/JerkOffToCelebs/comments/lk5bc8/cant_help_b... | Black widow | t3_lk5bc8 | r/JerkOffToCelebs | Stanley_Elkind | 2021-02-15 04:21:15 | Negative | Neutral | 1.0 | submission | comment | 2 | cant_help_but_notice_the_way_she_looks_at_you_now | 11 | [] | 0 |
| 3 | t1_gnuualx | /r/JerkOffToCelebs/comments/lmc28b/amber_heard... | The more people hate her the more I want to fu... | t3_lmc28b | r/JerkOffToCelebs | Stanley_Elkind | 2021-02-18 05:36:20 | Negative | Neutral | 3.0 | submission | comment | 12 | amber_heard_could_do_with_a_skull_fucking | 8 | [] | 0 |
| 4 | t1_go1jx5r | /r/JerkOffToCelebs/comments/lnodlf/amber_heard... | I can’t be quit her. I don’t want to. | t3_lnodlf | r/JerkOffToCelebs | Stanley_Elkind | 2021-02-19 20:01:40 | Neutral | Positive | 1.0 | submission | comment | 9 | amber_heard_is_the_sexiest_psycho_i_ever_saw | 9 | [] | 0 |
df_stanley.subreddit.value_counts().head(10)
r/JerkOffToCelebs 36 r/pickoneceleb 3 r/CelebAssPussyMouth 2 r/CelebWouldYouRather 2 r/DC_Cinematic 1 Name: subreddit, dtype: int64
df_stanley.text.value_counts().head(10)
When it comes to Amber Heard I let my penis make the decisions. 1 Honestly, the more people tell she’s evil the more I want to cum for her 1 Black widow 1 I’d like Amber all to myself. I’d like to try double anal with Margot. 1 Squeeze Emily’s fat ass while she’s riding me\n\nStuff my genitals inside Bella’s face \n\nFully clothed handjob with Florence. Her sultry eyes are more than enough for me. \n\nOily titjob from Lizzie Olsen and cum inside her pretty mouth\n\nRough doggy style anal with Amber Heard for obvious reasons\n\nMake out with Zendaya until it leaves a stain in my pants 1 [Morgan Freeman voice]\n\nI agree with the second part. 1 I don’t actually care what she did. The hate lobbed at her is just different type of celebrity worship which Reddit supposedly hates. I like to cum to pics of her face. 1 I gotta admit... i would lick her shitter 1 Everyone is cool burgeoning cult leader Jared Leto though. 1 Breed repeatedly on my own: Gal\n\nBreed once: Melissa\n\nGang bang Margot\n\nAnd this was obviously just an excuse to hear other people say theyd watch Amber Heard get fucked by a horse 1 Name: text, dtype: int64
df_stanley_contributions = df_stanley.groupby(df_stanley.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_stanley_contributions,
x='created_at',
y='n_contributions', title='The number of "Stanley_Elkind" contributions in 2021')
fig.update_layout(
xaxis = dict(
title='Contribution Date',
tickmode = 'array',
tickvals = df_stanley_contributions.created_at,
)
)
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
Negative Submissions
# check the date this account was creted
df_users[df_users.user_name == 'Truthbetheprejudice']
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 70571 | Truthbetheprejudice | True | True | True | True | NaN | NaN | NaT | banned | banned |
df_truth = df.query(" author == 'Truthbetheprejudice' ").reset_index(drop=True)
print(df_truth.shape)
df_truth.head()
(43, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t3_mtioam | /r/MensRights/comments/mtioam/johnny_depp_seen... | Johnny Depp Seen In Rare Photos In Spain, As H... | NaN | r/MensRights | Truthbetheprejudice | 2021-04-18 18:41:42 | Positive | Neutral | 1.0 | NaN | submission | 17 | johnny_depp_seen_in_rare_photos_in_spain_as_his | 10 | [] | 0 |
| 1 | t3_mtjtsr | /r/movies/comments/mtjtsr/johnny_depp_makes_an... | Johnny Depp Makes an Appearance in Spain as La... | NaN | r/movies | Truthbetheprejudice | 2021-04-18 19:40:25 | Positive | Negative | 1.0 | NaN | submission | 16 | johnny_depp_makes_an_appearance_in_spain_as | 8 | [] | 0 |
| 2 | t3_mtkogv | /r/entertainment/comments/mtkogv/johnny_depp_m... | Johnny Depp Makes an Appearance in Spain as La... | NaN | r/entertainment | Truthbetheprejudice | 2021-04-18 20:24:14 | Positive | Negative | 1.0 | NaN | submission | 16 | johnny_depp_makes_an_appearance_in_spain_as | 8 | [] | 0 |
| 3 | t3_muyp3k | /r/MensRights/comments/muyp3k/justiceforjohnny... | #JusticeForJohnnyDepp Johnny Depp fans donate ... | NaN | r/MensRights | Truthbetheprejudice | 2021-04-20 19:47:45 | Neutral | Negative | 75.0 | NaN | submission | 12 | justiceforjohnnydepp_johnny_depp_fans_donate_40k | 6 | [] | 0 |
| 4 | t3_muypet | /r/entertainment/comments/muypet/justiceforjoh... | #JusticeForJohnnyDepp Johnny Depp fans donate ... | NaN | r/entertainment | Truthbetheprejudice | 2021-04-20 19:48:06 | Neutral | Negative | 1.0 | NaN | submission | 12 | justiceforjohnnydepp_johnny_depp_fans_donate_40k | 6 | [] | 0 |
df_truth.subreddit.value_counts().head(10)
r/entertainment 10 r/MensRights 10 r/movies 10 r/pussypassdenied 8 r/JusticeForJohnnyDepp 5 Name: subreddit, dtype: int64
df_truth.text.value_counts().head(10)
Johnny Depp sues ACLU on to see if ex-wife Amber Heard gave $7m divorce settlement 5 Amber Heard Is Allegedly Being Investigated By LAPD For Perjury, Could Face Jail Time Over Johnny Depp Domestic Violence Accusations 5 Amber Heard probed for perjury and staging 2016 Johnny Depp domestic violence case: 'Lock her up' 5 Amber Heard Under Investigation for Perjury in Johnny Depp Domestic Violence Case 5 Amber Heard to struggle in new LAPD domestic violence probe 5 #JusticeForJohnnyDepp Johnny Depp fans donate $40k to children that Amber Heard neglected 4 Remove Amber Heard from Aquaman 2 4 Confirmed: Amber Heard Under Criminal Investigation in Australia 4 Big tech silence Johnny Depp and censor Amber Heard evidence 3 Johnny Depp Makes an Appearance in Spain as Lawyers Drop New Evidence in Amber Heard Case 2 Name: text, dtype: int64
df_truth_contributions = df_truth.groupby(df_truth.created_at.dt.date).size().reset_index(name='n_contributions')
df_truth_contributions
| created_at | n_contributions | |
|---|---|---|
| 0 | 2021-04-18 | 3 |
| 1 | 2021-04-20 | 4 |
| 2 | 2021-04-29 | 4 |
| 3 | 2021-04-30 | 4 |
| 4 | 2021-05-07 | 8 |
| 5 | 2021-05-08 | 5 |
| 6 | 2021-05-09 | 5 |
| 7 | 2021-05-10 | 5 |
| 8 | 2021-05-19 | 5 |
fig = px.bar(df_truth_contributions,
x='created_at',
y='n_contributions', title='The number of "Truthbetheprejudice" contributions in 2021')
fig.update_layout(
xaxis = dict(
title='Contribution Date',
tickmode = 'array',
tickvals = df_truth_contributions.created_at,
)
)
fig.update_traces(marker_color='red',
marker_line_width=2, opacity=1, textposition='auto')
fig.show()
voting in a positive way
df_gaul = df.query(" author == 'gaul66' ").reset_index(drop=True)
print(df_gaul.shape)
df_gaul.head()
(37, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t1_ghobzuy | /r/CelebBattles/comments/ko1duk/marvel_vs_dc_t... | Marvel | t3_ko1duk | r/CelebBattles | gaul66 | 2021-01-01 04:29:09 | Neutral | Positive | 8.0 | submission | comment | 1 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | [] | 0 |
| 1 | t1_ghuwup4 | /r/CelebBattles/comments/kp2qmx/rachel_mcadams... | Rachel | t3_kp2qmx | r/CelebBattles | gaul66 | 2021-01-02 20:16:31 | Neutral | Neutral | 2.0 | submission | comment | 1 | rachel_mcadams_vs_amber_heard | 5 | [] | 0 |
| 2 | t1_giqmms5 | /r/CelebBattles/comments/ku6peu/hotter_one_gal... | **Gal** | t3_ku6peu | r/CelebBattles | gaul66 | 2021-01-10 06:10:19 | Neutral | Neutral | 5.0 | submission | comment | 1 | hotter_one_gal_gadot_vs_amber_heard_vs_alison_... | 10 | [] | 0 |
| 3 | t1_gk1f28y | /r/CelebBattles/comments/l1qwsh/my_top_6random... | Natalie | t3_l1qwsh | r/CelebBattles | gaul66 | 2021-01-21 07:10:16 | Neutral | Neutral | 1.0 | submission | comment | 1 | my_top_6random_order_natalie_portman_vs_emily | 8 | [] | 0 |
| 4 | t1_gkf1t2n | /r/CelebBattles/comments/l3eo1l/katy_perry_vs_... | **Evangeline** | t3_l3eo1l | r/CelebBattles | gaul66 | 2021-01-23 18:03:01 | Neutral | Neutral | 2.0 | submission | comment | 1 | katy_perry_vs_evangeline_lilly_vs_amber_heard | 8 | [] | 0 |
df_gaul.subreddit.value_counts().head(10)
r/CelebBattles 37 Name: subreddit, dtype: int64
df_gaul['permalink'].iloc[5]
'/r/CelebBattles/comments/l4h1wp/amber_heard_vs_brie_larson/gkpl5eg/'
# check the date this account was creted
# user_name --> index
# at[index_value , 'column']
# df_users.set_index('user_name').at['Stanley_Elkind', 'user_created_at']
df_users[df_users.user_name == 'LoveAmberHeard42286']
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 65068 | LoveAmberHeard42286 | True | True | False | False | 146.0 | 3388.0 | 2021-01-07 12:34:04 | others | 2021 |
df_love = df.query(" author == 'LoveAmberHeard42286' ").reset_index(drop=True)
print(df_love.shape)
df_love.head()
(124, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t3_kul0ox | /r/celebgifs/comments/kul0ox/amber_heard_enjoy... | Amber Heard enjoying some entertainment | NaN | r/celebgifs | LoveAmberHeard42286 | 2021-01-10 19:31:39 | Positive | Positive | 0.0 | NaN | submission | 5 | amber_heard_enjoying_some_entertainment | 5 | [] | 0 |
| 1 | t1_gishb5o | /r/gentlemanboners/comments/kffqt2/amber_heard... | Stunning | t3_kffqt2 | r/gentlemanboners | LoveAmberHeard42286 | 2021-01-10 19:36:30 | Positive | Positive | 3.0 | submission | comment | 1 | amber_heard | 2 | [] | 0 |
| 2 | t3_kul5v4 | /r/gentlemanboners/comments/kul5v4/amber_heard/ | Amber Heard | NaN | r/gentlemanboners | LoveAmberHeard42286 | 2021-01-10 19:38:05 | Neutral | Neutral | 0.0 | NaN | submission | 2 | amber_heard | 2 | [] | 0 |
| 3 | t1_gismviy | /r/unpopularopinion/comments/jps1eu/johnny_dep... | He did. Hence why he lost his case. They had r... | t3_jps1eu | r/unpopularopinion | LoveAmberHeard42286 | 2021-01-10 20:22:23 | Negative | Negative | 2.0 | submission | comment | 59 | johnny_depp_probably_did_physically_abuse_amber | 7 | [] | 0 |
| 4 | t1_gisn0sb | /r/unpopularopinion/comments/ht1ota/amber_hear... | ... 5 months later. Looks like the sun wasn't ... | t3_ht1ota | r/unpopularopinion | LoveAmberHeard42286 | 2021-01-10 20:23:33 | Neutral | Positive | 2.0 | submission | comment | 12 | amber_heard_and_johnny_depp_both_suck | 7 | [] | 0 |
df_love.subreddit.value_counts().head(10)
r/JerkOffToCelebs 85 r/Celebhub 26 r/gentlemanboners 4 r/unpopularopinion 2 u/LoveAmberHeard42286 2 r/Celebrityfemdom 1 r/Celebs 1 r/Amber_Heard 1 r/IStandWithAmberHeard 1 r/celebgifs 1 Name: subreddit, dtype: int64
df_love.text.value_counts().head(10)
Amber Heard 72 Amber Heard 💜 6 Amber Heard ❤ 5 She's an abuse victim as multiple judges have agreed on based on evidence. And what you're talking about is rape. You need to take a look at yourself before casting stones at someone else. 2 Didn't do anything wrong? That must he why he lost his case. 1 The only trash is Her wino ex husband. Fuck that loser. 1 i love me some Amber Heard 1 Maybe he should have tried to not be an abusive crackhead 1 ... 5 months later. Looks like the sun wasn't lying for once. 1 Go jump back up your mother and cook some more, retard. 1 Name: text, dtype: int64
df_love_contributions = df_love.groupby(df_love.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_love_contributions,
x='created_at',
y='n_contributions', title='The number of "LoveAmberHeard42286" contributions in 2021')
fig.update_layout(
xaxis = dict(
title='Contribution Date',
tickmode = 'array',
tickvals = df_stanley_contributions.created_at,
)
)
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1)
# , marker_line_color='#5296dd'
fig.show()
df_love_contributions.sort_values('n_contributions', ascending=False)
| created_at | n_contributions | |
|---|---|---|
| 17 | 2021-03-15 | 9 |
| 11 | 2021-03-08 | 8 |
| 16 | 2021-03-14 | 7 |
| 33 | 2021-05-02 | 7 |
| 24 | 2021-04-14 | 7 |
| 23 | 2021-04-03 | 5 |
| 0 | 2021-01-10 | 5 |
| 27 | 2021-04-22 | 5 |
| 6 | 2021-02-27 | 4 |
| 12 | 2021-03-09 | 4 |
| 18 | 2021-03-16 | 4 |
| 28 | 2021-04-23 | 3 |
| 26 | 2021-04-21 | 3 |
| 29 | 2021-04-24 | 3 |
| 30 | 2021-04-28 | 3 |
| 31 | 2021-04-29 | 3 |
| 19 | 2021-03-28 | 3 |
| 42 | 2021-05-31 | 3 |
| 9 | 2021-03-04 | 3 |
| 15 | 2021-03-13 | 2 |
| 8 | 2021-03-03 | 2 |
| 40 | 2021-05-26 | 2 |
| 38 | 2021-05-13 | 2 |
| 37 | 2021-05-11 | 2 |
| 2 | 2021-02-19 | 2 |
| 3 | 2021-02-20 | 2 |
| 7 | 2021-03-02 | 2 |
| 4 | 2021-02-23 | 2 |
| 25 | 2021-04-19 | 2 |
| 20 | 2021-03-29 | 2 |
| 5 | 2021-02-24 | 1 |
| 14 | 2021-03-11 | 1 |
| 10 | 2021-03-07 | 1 |
| 32 | 2021-04-30 | 1 |
| 13 | 2021-03-10 | 1 |
| 34 | 2021-05-04 | 1 |
| 35 | 2021-05-09 | 1 |
| 36 | 2021-05-10 | 1 |
| 22 | 2021-04-01 | 1 |
| 1 | 2021-01-20 | 1 |
| 39 | 2021-05-20 | 1 |
| 41 | 2021-05-27 | 1 |
| 21 | 2021-03-31 | 1 |
Negative Comments
# check the date this account was creted
df_users[df_users.user_name == 'sadwook']
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 43292 | sadwook | True | False | False | False | 131.0 | 21.0 | 2018-12-29 07:46:33 | others | 2018 |
df_sadwook = df.query(" author == 'sadwook' ").reset_index(drop=True)
print(df_sadwook.shape)
df_sadwook.head()
(32, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t1_gxpnlsu | /r/pussypassdenied/comments/n95rfx/amber_heard... | Good... \n\n"" He said: “The severity of these... | t3_n95rfx | r/pussypassdenied | sadwook | 2021-05-11 10:09:06 | Positive | Negative | 1.0 | submission | comment | 101 | amber_heard_to_struggle_in_new_lapd_domestic | 8 | [] | 0 |
| 1 | t1_gxpnp4c | /r/pussypassdenied/comments/n95rfx/amber_heard... | i will not hold back. JUSTICE FOR JOHNNY | t3_n95rfx | r/pussypassdenied | sadwook | 2021-05-11 10:10:29 | Neutral | Negative | 2.0 | submission | comment | 8 | amber_heard_to_struggle_in_new_lapd_domestic | 8 | [] | 0 |
| 2 | t1_gxpntzg | /r/pussypassdenied/comments/n95rfx/amber_heard... | we wont stay silent. this mans reputation is g... | t1_gxnoecm | r/pussypassdenied | sadwook | 2021-05-11 10:12:32 | Positive | Neutral | 1.0 | comment | comment | 34 | amber_heard_to_struggle_in_new_lapd_domestic | 8 | [] | 0 |
| 3 | t1_gxpnxf1 | /r/pussypassdenied/comments/n95rfx/amber_heard... | pretty much the summary of her career. im not ... | t1_gxoh4o0 | r/pussypassdenied | sadwook | 2021-05-11 10:13:55 | Negative | Neutral | 3.0 | comment | comment | 24 | amber_heard_to_struggle_in_new_lapd_domestic | 8 | [] | 0 |
| 4 | t1_gxpnzbf | /r/pussypassdenied/comments/n95rfx/amber_heard... | bruh | t1_gxn7p1k | r/pussypassdenied | sadwook | 2021-05-11 10:14:43 | Neutral | Neutral | 0.0 | comment | comment | 1 | amber_heard_to_struggle_in_new_lapd_domestic | 8 | [] | 0 |
df_sadwook.subreddit.value_counts().head(10)
r/pussypassdenied 32 Name: subreddit, dtype: int64
df_sadwook_contributions = df_sadwook.groupby(df_sadwook.created_at.dt.date).size().reset_index(name='n_contributions')
df_sadwook_contributions
| created_at | n_contributions | |
|---|---|---|
| 0 | 2021-05-11 | 32 |
fig = px.bar(df_sadwook_contributions,
x='created_at',
y='n_contributions', title='The number of "sadwook" contributions in 2021')
fig.update_traces(marker_color='red',
marker_line_width=2, opacity=1, textposition='auto')
fig.show()
It's weird!!
all this user contributions in 2021 (32) are in the same day 2021-05-11
user created: 2018-12-29 the same date of 2018 peak!!
df_sadwook_hrs = df_sadwook.groupby(df_sadwook.created_at.dt.hour).size().reset_index(name='n_contributions')
df_sadwook_hrs
| created_at | n_contributions | |
|---|---|---|
| 0 | 10 | 19 |
| 1 | 11 | 13 |
fig = px.bar(df_sadwook_hrs,
x='created_at',
y='n_contributions', title='The number of "sadwook" contributions in 2021')
fig.update_traces(marker_color='red',
marker_line_width=2, opacity=1, textposition='auto')
fig.show()
positive Comments
df_beatplayer = df.query(" author == 'Beatplayer' ").reset_index(drop=True)
print(df_beatplayer.shape)
df_beatplayer.head()
(32, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t1_gpl1ocz | /r/TrueOffMyChest/comments/lx2s7w/its_disgusti... | Wait wait. Has something changed here. Because... | t3_lx2s7w | r/TrueOffMyChest | Beatplayer | 2021-03-03 22:00:23 | Negative | Neutral | 36.0 | submission | comment | 36 | its_disgusting_that_people_are_less_angry_about | 8 | [] | 0 |
| 1 | t1_gpl5mw9 | /r/TrueOffMyChest/comments/lx2s7w/its_disgusti... | But then again, that was just a statement made... | t1_gpl5997 | r/TrueOffMyChest | Beatplayer | 2021-03-03 22:30:38 | Positive | Neutral | -1.0 | comment | comment | 86 | its_disgusting_that_people_are_less_angry_about | 8 | [] | 0 |
| 2 | t1_gpl71q8 | /r/TrueOffMyChest/comments/lx2s7w/its_disgusti... | Like most abusive relationships tbh, from the ... | t1_gpl6uww | r/TrueOffMyChest | Beatplayer | 2021-03-03 22:41:44 | Positive | Neutral | -2.0 | comment | comment | 26 | its_disgusting_that_people_are_less_angry_about | 8 | [] | 0 |
| 3 | t1_gpla7k5 | /r/TrueOffMyChest/comments/lx2s7w/its_disgusti... | K. So I’m 12.5 mins in, and absolutely shatter... | t1_gpl8d57 | r/TrueOffMyChest | Beatplayer | 2021-03-03 23:07:01 | Negative | Negative | 20.0 | comment | comment | 109 | its_disgusting_that_people_are_less_angry_about | 8 | [] | 0 |
| 4 | t1_gplbi77 | /r/TrueOffMyChest/comments/lx2s7w/its_disgusti... | I’m not sure that the case to be honest. \n\nI... | t1_gpl9u5x | r/TrueOffMyChest | Beatplayer | 2021-03-03 23:17:45 | Negative | Neutral | 15.0 | comment | comment | 149 | its_disgusting_that_people_are_less_angry_about | 8 | [] | 0 |
df_beatplayer['permalink'].iloc[0]
'/r/TrueOffMyChest/comments/lx2s7w/its_disgusting_that_people_are_less_angry_about/gpl1ocz/'
df_beatplayer.subreddit.value_counts().head(10)
r/TrueOffMyChest 32 Name: subreddit, dtype: int64
df_beatplayer.text.value_counts().head(3)
So it’s nearly 12 and I’ll be back tomorrow to read this :) Sleep well! 1 I’m not sure that the case to be honest. \n\nIt was proven in a court, on the balance of probabilities, that a media outlet was justified in calling JD domestically abusive. They couldn’t say the same for me, for instance, because I’m not domestically abusive. \n\nSimilarly in the civil case, there were a number of preliminary rulings that found, on the balance of probabilities, that there was enough evidence of abuse to out protective measures in place. Again, the same couldn’t be said for me for instance, because there is no evidence of abusing behaviour. He then, quite sensibly settled. Ya know, because he would have lost. Because of the evidence. \n\nIt’s reaching a little bit to say that he hasn’t lost a case. Although I see roughly where you’re coming from. \n\nI suspect it would be manifestly a bad move for him to sue her. Is that happening? 1 ‘Obviously, most domestic abuse is committed by men’ \n\n- what makes it obvious? Is it that DV is a male problem, and you just don’t like seeing it written in an English? \n\n‘Women can also rape men’ \n- No no, they genuinely can’t. They can be secondaries to a crime of rape (and interestingly, women who are convicted of any type of sexual or violent crime are, almost without exception, secondaries to a male perpetrator) but they absolutely cannot be rapists. Unless you’re making a very forward thinking point about the trans community. Which I absolutely don’t think you are. \n\nTo be honest, if I was a big ole manly testosterone driven drug addled abuser, I’d be petrified of a women who was brave enough to document my behaviour. I’d probably release all sorts of unevidenced wank to the media, which would probably be lapped up by a wide range of misogynist internet commenters. I’d probably be fairly successful in it. And I’d also probably be cognisant that that was part of the spectrum of abuse I was putting my ex-partner through. 1 Name: text, dtype: int64
Check wether the users with the most contributions are mod, gold or having a verified email¶
df.author.value_counts().nlargest(n=25)
-banned- 1587 AutoModerator 515 CelebBattleVoteBot 163 LoveAmberHeard42286 124 charliedba 99 Stanley_Elkind 44 Truthbetheprejudice 43 gaul66 37 Beatplayer 32 sadwook 32 AltruisticVariation4 31 YahYah241 31 faithle55 30 amphibiousParakeet 29 Jreal22 28 BalonSwann07 26 masseffect2001 25 Thankunext4 25 DutchArtworks 24 swampwitch116 24 EdwardCircumcizehand 24 Aphaelo 23 cptn3sm10 23 the-speed-of-pain 22 my_alt_account1312 22 Name: author, dtype: int64
check_list = df.author.value_counts().nlargest(n=25).index.tolist()[1:]
check_list
['AutoModerator', 'CelebBattleVoteBot', 'LoveAmberHeard42286', 'charliedba', 'Stanley_Elkind', 'Truthbetheprejudice', 'gaul66', 'Beatplayer', 'sadwook', 'AltruisticVariation4', 'YahYah241', 'faithle55', 'amphibiousParakeet', 'Jreal22', 'BalonSwann07', 'masseffect2001', 'Thankunext4', 'DutchArtworks', 'swampwitch116', 'EdwardCircumcizehand', 'Aphaelo', 'cptn3sm10', 'the-speed-of-pain', 'my_alt_account1312']
# get a data frame with the most negative-comments users
df_check = df_users[df_users['user_name'].isin(check_list)]
print(df_check.shape)
df_check.head(2)
(24, 10)
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 4432 | AutoModerator | True | True | True | False | 1000.0 | 1000.0 | 2012-01-05 05:24:28 | others | others |
| 8326 | faithle55 | True | False | False | False | 328449.0 | 482.0 | 2013-01-06 22:13:46 | others | others |
df_check['user_name'].nunique()
24
for col in df_check.columns:
if col not in ['user_name', 'user_created_at']:
print('The value counts of the users with the most contributions: ' + col)
print(df_check[col].value_counts())
print('\n')
The value counts of the users with the most contributions: has_verified_email True 24 Name: has_verified_email, dtype: int64 The value counts of the users with the most contributions: is_mod False 14 True 10 Name: is_mod, dtype: int64 The value counts of the users with the most contributions: is_gold False 18 True 6 Name: is_gold, dtype: int64 The value counts of the users with the most contributions: is_banned False 20 True 4 Name: is_banned, dtype: int64 The value counts of the users with the most contributions: comment_karma 100113.0 1 146.0 1 1000.0 1 1359.0 1 16915.0 1 13977.0 1 5608.0 1 131.0 1 27642.0 1 405.0 1 25602.0 1 1916.0 1 18790.0 1 1436.0 1 22104.0 1 1206.0 1 328449.0 1 1063.0 1 17395.0 1 88814.0 1 Name: comment_karma, dtype: int64 The value counts of the users with the most contributions: link_karma 331.0 1 3388.0 1 1000.0 1 32197.0 1 30001.0 1 1127.0 1 1296.0 1 301.0 1 1138.0 1 108.0 1 482.0 1 2985.0 1 51.0 1 10931.0 1 19887.0 1 1439.0 1 21.0 1 14912.0 1 20315.0 1 1.0 1 Name: link_karma, dtype: int64 The value counts of the users with the most contributions: banned_unverified others 20 banned 4 Name: banned_unverified, dtype: int64 The value counts of the users with the most contributions: creation_year 2019 6 2021 5 banned 4 2020 4 others 3 2018 2 Name: creation_year, dtype: int64
# pd.set_option('display.max_colwidth', None)
suspected_dict = {}
df_fuc = df[df.text.str.lower().str.contains('fuck')]
print(df_fuc.shape)
df_fuc.head()
(1227, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 54 | t1_gho9bpc | /r/CelebBattles/comments/ko1duk/marvel_vs_dc_t... | Fuck a nd I thought I didn't have a life LOL | t1_gho5bap | r/CelebBattles | Beav365 | 2021-01-01 03:57:09 | Positive | Negative | -2.0 | comment | comment | 11 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | [] | 0 |
| 88 | t1_ghoms5q | /r/redditmoment/comments/ko1xfy/delete_tik_tok... | Shut the fuck up about this stealing your priv... | t1_ghoe5zj | r/redditmoment | big-shaq-skrra | 2021-01-01 06:54:36 | Negative | Negative | 56.0 | comment | comment | 28 | delete_tik_tok_or_slap_amber_heard | 7 | [] | 0 |
| 105 | t1_ghooy78 | /r/redditmoment/comments/ko1xfy/delete_tik_tok... | What the fuck | t1_ghon57f | r/redditmoment | Bombz_Armed | 2021-01-01 07:28:39 | Negative | Negative | 55.0 | comment | comment | 3 | delete_tik_tok_or_slap_amber_heard | 7 | [] | 0 |
| 110 | t1_ghopc1o | /r/Teenager/comments/knh0wt/just_cuz_i_wanna_k... | who the fuck voted ofr amber heard | t3_knh0wt | r/Teenager | matthew35433ma | 2021-01-01 07:34:53 | Negative | Negative | 1.0 | submission | comment | 7 | just_cuz_i_wanna_know_how_many_of_you_support | 10 | [] | 0 |
| 142 | t1_ghot913 | /r/redditmoment/comments/ko1xfy/delete_tik_tok... | I'd fuck her for sure | t3_ko1xfy | r/redditmoment | FatHandNoticer | 2021-01-01 08:45:21 | Positive | Neutral | -11.0 | submission | comment | 5 | delete_tik_tok_or_slap_amber_heard | 7 | [] | 0 |
# get the authors of these submissions having the same submission text
mask = (df['submission_text'] == 'fuck_amber_heard') & (df['submission_comment'] == 'submission')
df_sub = df[mask]
print(df_sub.shape)
with pd.option_context('display.max_colwidth', None):
display(df_sub.head())
(7, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 684 | t3_kpbsbr | /r/Animemes/comments/kpbsbr/fuck_amber_heard/ | Fuck amber heard | NaN | r/Animemes | dingusbob69 | 2021-01-03 02:38:42 | Negative | Negative | 269.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
| 889 | t3_kq9lal | /r/memes/comments/kq9lal/fuck_amber_heard/ | fuck Amber Heard | NaN | r/memes | guyinAmerica1 | 2021-01-04 14:18:40 | Negative | Negative | 1.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
| 1404 | t3_kriljq | /r/JusticeForJohnnyDepp/comments/kriljq/fuck_amber_heard/ | fuck amber heard | NaN | r/JusticeForJohnnyDepp | isaac0304 | 2021-01-06 06:59:05 | Negative | Negative | 33.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
| 1853 | t3_ktkm8u | /r/EntitledBitch/comments/ktkm8u/fuck_amber_heard/ | Fuck amber heard | NaN | r/EntitledBitch | big_pog_human2478 | 2021-01-09 05:17:34 | Negative | Negative | 14738.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
| 2389 | t3_kukxmy | /r/SupportAmberHeard/comments/kukxmy/fuck_amber_heard/ | FUCK AMBER HEARD | NaN | r/SupportAmberHeard | Flerp6969 | 2021-01-10 19:28:00 | Negative | Negative | 54.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
# get the authors of these submissions having the same submission text
mask = (df['submission_text'].str.contains('fuck')) & (df['submission_comment'] == 'submission')
df_sub_fuc = df[mask]
print(df_sub_fuc.shape)
with pd.option_context('display.max_colwidth', None):
display(df_sub_fuc.head())
(49, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 183 | t3_ko9ymn | /r/memes/comments/ko9ymn/fuck_amber_heard_credit_to_uwhitewolf699420_for/ | Fuck Amber Heard (credit to u/Whitewolf699420 for the template) | NaN | r/memes | -banned- | 2021-01-01 11:39:40 | Negative | Negative | 1.0 | NaN | submission | 9 | fuck_amber_heard_credit_to_uwhitewolf699420_for | 7 | [] | 0 |
| 186 | t3_ko9zmj | /r/dankmemes/comments/ko9zmj/fuck_amber_heard_credit_to_uwhitewolf699420_for/ | Fuck Amber Heard (credit to u/Whitewolf699420 for template) | NaN | r/dankmemes | -banned- | 2021-01-01 11:41:46 | Negative | Negative | 1.0 | NaN | submission | 8 | fuck_amber_heard_credit_to_uwhitewolf699420_for | 7 | [] | 0 |
| 684 | t3_kpbsbr | /r/Animemes/comments/kpbsbr/fuck_amber_heard/ | Fuck amber heard | NaN | r/Animemes | dingusbob69 | 2021-01-03 02:38:42 | Negative | Negative | 269.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
| 889 | t3_kq9lal | /r/memes/comments/kq9lal/fuck_amber_heard/ | fuck Amber Heard | NaN | r/memes | guyinAmerica1 | 2021-01-04 14:18:40 | Negative | Negative | 1.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
| 1404 | t3_kriljq | /r/JusticeForJohnnyDepp/comments/kriljq/fuck_amber_heard/ | fuck amber heard | NaN | r/JusticeForJohnnyDepp | isaac0304 | 2021-01-06 06:59:05 | Negative | Negative | 33.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
df_sub_fuc_contributions = df_sub_fuc.groupby(df_sub_fuc.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_sub_fuc_contributions,
x='created_at',
y='n_contributions', title='The number of submissions with the word "F*CK" in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
df_sub_fuc_contributions.sort_values('n_contributions', ascending=False)
| created_at | n_contributions | |
|---|---|---|
| 24 | 2021-03-29 | 4 |
| 6 | 2021-01-10 | 3 |
| 0 | 2021-01-01 | 2 |
| 14 | 2021-02-01 | 2 |
| 27 | 2021-04-14 | 2 |
| 26 | 2021-04-08 | 2 |
| 23 | 2021-03-28 | 2 |
| 19 | 2021-03-05 | 2 |
| 34 | 2021-05-19 | 2 |
| 8 | 2021-01-16 | 2 |
| 7 | 2021-01-13 | 2 |
| 9 | 2021-01-18 | 1 |
| 5 | 2021-01-09 | 1 |
| 33 | 2021-05-07 | 1 |
| 32 | 2021-04-29 | 1 |
| 31 | 2021-04-27 | 1 |
| 30 | 2021-04-23 | 1 |
| 29 | 2021-04-22 | 1 |
| 28 | 2021-04-15 | 1 |
| 2 | 2021-01-04 | 1 |
| 3 | 2021-01-06 | 1 |
| 25 | 2021-04-01 | 1 |
| 4 | 2021-01-07 | 1 |
| 22 | 2021-03-13 | 1 |
| 10 | 2021-01-20 | 1 |
| 21 | 2021-03-10 | 1 |
| 20 | 2021-03-08 | 1 |
| 18 | 2021-02-22 | 1 |
| 1 | 2021-01-03 | 1 |
| 16 | 2021-02-12 | 1 |
| 15 | 2021-02-09 | 1 |
| 13 | 2021-01-30 | 1 |
| 12 | 2021-01-29 | 1 |
| 11 | 2021-01-28 | 1 |
| 17 | 2021-02-18 | 1 |
df_fuc.author.value_counts().head(10)
-banned- 86 Jreal22 10 Loveseeingthatsmile 6 AutoModerator 5 VampireQueenDespair 4 zephyrBoom72 4 thesecoloursdontrun 4 blackweebow 4 HornyJit 4 Stanley_Elkind 4 Name: author, dtype: int64
df_fuc.submission_comment.value_counts()
comment 1150 submission 77 Name: submission_comment, dtype: int64
df_fuc.subreddit.value_counts().head(10)
r/JerkOffToCelebs 188 r/entertainment 154 r/pussypassdenied 151 r/MensRights 82 r/TrueOffMyChest 66 r/iamatotalpieceofshit 61 r/CelebAssPussyMouth 27 r/awfuleverything 27 r/DC_Cinematic 27 r/AskReddit 26 Name: subreddit, dtype: int64
df_fuc.created_at.dt.date.value_counts().head(10)
2021-04-17 73 2021-02-28 72 2021-03-04 60 2021-05-11 45 2021-02-20 40 2021-01-16 34 2021-05-08 25 2021-04-18 25 2021-04-01 24 2021-01-01 23 Name: created_at, dtype: int64
df_fuc_contributions = df_fuc.groupby(df_fuc.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_fuc_contributions,
x='created_at',
y='n_contributions', title='The number of "F*CK" contributions in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
df_fuc_contributions.sort_values('n_contributions', ascending=False).head(10)
| created_at | n_contributions | |
|---|---|---|
| 92 | 2021-04-17 | 73 |
| 54 | 2021-02-28 | 72 |
| 58 | 2021-03-04 | 60 |
| 115 | 2021-05-11 | 45 |
| 46 | 2021-02-20 | 40 |
| 15 | 2021-01-16 | 34 |
| 112 | 2021-05-08 | 25 |
| 93 | 2021-04-18 | 25 |
| 81 | 2021-04-01 | 24 |
| 0 | 2021-01-01 | 23 |
used the word f*ck 10 times
Negative Comments
df_jeral = df.query(" author == 'Jreal22' ")
print(df_jeral.shape)
df_jeral.head()
(28, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 13069 | t1_guu4ilo | /r/entertainment/comments/msgoz8/johnny_depp_r... | She was feeding him Xanax constantly too, you ... | t1_gutc2jp | r/entertainment | Jreal22 | 2021-04-17 11:43:17 | Negative | Neutral | 60.0 | comment | comment | 76 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 13073 | t1_guu4sfw | /r/entertainment/comments/msgoz8/johnny_depp_r... | Lots of people have their lines fed to them, i... | t1_gutd28y | r/entertainment | Jreal22 | 2021-04-17 11:45:13 | Negative | Negative | 6.0 | comment | comment | 49 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 13076 | t1_guu57ix | /r/entertainment/comments/msgoz8/johnny_depp_r... | Yeah, I couldn't believe this.\n\nThey tried t... | t1_guthn8f | r/entertainment | Jreal22 | 2021-04-17 11:48:22 | Positive | Neutral | 23.0 | comment | comment | 58 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 13081 | t1_guu60bw | /r/entertainment/comments/msgoz8/johnny_depp_r... | Yeah, Depp needed to stop doing the same movie... | t1_guteoj8 | r/entertainment | Jreal22 | 2021-04-17 11:54:07 | Positive | Positive | 1.0 | comment | comment | 36 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 13092 | t1_guu7wa8 | /r/entertainment/comments/msgoz8/johnny_depp_r... | Jay Z should have taken Chris brown out back a... | t1_gutexk2 | r/entertainment | Jreal22 | 2021-04-17 12:07:33 | Negative | Neutral | 8.0 | comment | comment | 39 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
df_jeral.text.value_counts().head(3)
Yeah, Depp needed to stop doing the same movies, he is super talented and could have an Oscar by now easily.\n\nBut he got to making 100 million a movie and fked it up a bit. 1 If you listen to the tapes he actually seems like a really nice guy, he tries so much to calm her down and whatnot. 1 I dated a woman who would slap the shit out of me and beg me to hit her back. I'm like, I'm never going to hit you back, she drank, and I didn't.\n\nThen one time things started getting really weird and she stood me up one night.\n\nI was like, what's up? And she bust out crying and said her dad had died.\n\nI felt terrible, so I was real nice to her that week, then I told my mom just in normal convo that her dad had died, so my mom called her mom to send her condolences for my girlfriends dad dying.\n\nGuess who's dad didn't die?\n\nThat's right, the girl was cheating on me and she forgot about dinner because she was sleeping with another guy, so she made up a lie THAT HER DAD WAS DEAD just to get out of why she stood me up for dinner.\n\nI called her mom and told her what she did and that I would no longer be dating her and that I was very sorry, but that I wanted to be truthful to her because if she went nuts on me, I wanted a record with everyone that this chick beat me, cheated on me, and told me her dad died when he was alive and well.\n\nI left her and of course she went crazy, but I just kept ignoring her and this was pre social media, so it was easier to disappear.\n\nWatch for the crazy signs, she was ultra hot, but they get on that ultra hot scale and end up being fking crazy. 1 Name: text, dtype: int64
suspected_dict['Jreal22'] = 'Created at:2019-06-23, used the word f*ck in 28 negative comments in APR 17,18 2021'
# Check for this user account datah
df_users.query(" user_name == 'Jreal22' ")
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 49493 | Jreal22 | True | False | False | False | 25602.0 | 108.0 | 2019-06-23 14:59:56 | others | 2019 |
df_jeral_contributions = df_jeral.groupby(df_jeral.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_jeral_contributions,
x='created_at',
y='n_contributions', title='The number of "Jreal22" contributions in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
fig.show()
df.text.value_counts().head()
[deleted] 339 Amber Heard 268 ### [Browse JerkOffChallenges](https://jerkofftocelebs.com/actors/) • [Browse Picture Galleries](https://jerkofftocelebs.com/pictures/) • NEW [JerkOffToGermanCelebs](https://reddit.com/r/JerkOffToGermanCelebs/)\n\n\n^(*Thank you for your submission. Make sure to follow the rules.*) \n\n^(*Check out our Website*) ^[*here*](https://jerkofftocelebs.com/). \n\n^(*Join our Discord*) ^[*here*](https://discord.gg/FMhrH2j).\n\n^(*Explore more subreddits*) ^[*here*](https://jerkofftocelebs.com/reddit-nsfw-list/).\n\n\n*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/JerkOffToCelebs) if you have any questions or concerns.* 161 [removed] 122 Rule 9 overused. No Johny Depp or Amber Heard memes at this time\n\n*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/memes) if you have any questions or concerns.* 52 Name: text, dtype: int64
Notes:
This comment repeated 11 Times!!
two bots
text = "can somebody rp as amber heard for me longterm"
df_rp = df[df.text.str.lower().str.contains(text)]
print(df_rp.shape)
with pd.option_context('display.max_colwidth', None):
display(df_rp.head())
(8, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8257 | t3_lqz7bz | /r/celebJObuds/comments/lqz7bz/can_somebody_rp_as_amber_heard_for_me_longterm/ | Can Somebody RP as Amber Heard for me longterm? | NaN | r/celebJObuds | -banned- | 2021-02-24 01:12:29 | Neutral | Neutral | 2.0 | NaN | submission | 9 | can_somebody_rp_as_amber_heard_for_me_longterm | 9 | [] | 0 |
| 8341 | t3_lrt0zh | /r/celebJObuds/comments/lrt0zh/can_somebody_rp_as_amber_heard_for_me_longterm/ | Can Somebody RP as Amber Heard for me longterm? | NaN | r/celebJObuds | RoleplayGuy21 | 2021-02-25 01:11:34 | Neutral | Neutral | 6.0 | NaN | submission | 9 | can_somebody_rp_as_amber_heard_for_me_longterm | 9 | [] | 0 |
| 8390 | t3_lsfc05 | /r/celebJObuds/comments/lsfc05/can_somebody_rp_as_amber_heard_for_me_longterm/ | Can Somebody RP as Amber Heard for me longterm? | NaN | r/celebJObuds | RoleplayGuy21 | 2021-02-25 19:57:10 | Neutral | Neutral | 4.0 | NaN | submission | 9 | can_somebody_rp_as_amber_heard_for_me_longterm | 9 | [] | 0 |
| 8417 | t3_lsgxdx | /r/celebJObuds/comments/lsgxdx/can_somebody_rp_as_amber_heard_for_me_longterm/ | Can Somebody RP as Amber Heard for me longterm? | NaN | r/celebJObuds | RoleplayGuy21 | 2021-02-25 21:07:11 | Neutral | Neutral | 14.0 | NaN | submission | 9 | can_somebody_rp_as_amber_heard_for_me_longterm | 9 | [] | 0 |
| 8472 | t3_lt8ipk | /r/celebJObuds/comments/lt8ipk/can_somebody_rp_as_amber_heard_for_me_longterm/ | Can Somebody RP as Amber Heard for me longterm? | NaN | r/celebJObuds | -banned- | 2021-02-26 21:38:12 | Neutral | Neutral | 2.0 | NaN | submission | 9 | can_somebody_rp_as_amber_heard_for_me_longterm | 9 | [] | 0 |
df_rp.subreddit.value_counts()
r/celebJObuds 5 r/CelebRoleplayJOBuds 2 r/CelebRoleplay 1 Name: subreddit, dtype: int64
df_rp.author.value_counts()
RoleplayTime21 3 RoleplayGuy21 3 -banned- 2 Name: author, dtype: int64
df_users.query("user_name in ['RoleplayGuy21', 'RoleplayTime21']")
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 64297 | RoleplayGuy21 | False | True | False | False | 1627.0 | 26569.0 | 2020-11-06 18:10:49 | unverified | 2020 |
| 65499 | RoleplayTime21 | False | False | False | False | 220.0 | 1188.0 | 2021-04-14 11:10:20 | unverified | 2021 |
TWO MORE BOTS!!
text = "amber heard bad"
df_bad = df[df.text.str.lower().str.contains(text)]
print(df_bad.shape)
with pd.option_context('display.max_colwidth', None):
display(df_bad.head())
(36, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 107 | t1_ghop7t4 | /r/redditmoment/comments/ko1xfy/delete_tik_tok_or_slap_amber_heard/ghop7t4/ | guys amber heard bad now give me upvotes | t3_ko1xfy | r/redditmoment | LupLush | 2021-01-01 07:32:58 | Negative | Negative | 6.0 | submission | comment | 8 | delete_tik_tok_or_slap_amber_heard | 7 | [] | 0 |
| 199 | t1_ghpdwee | /r/redditmoment/comments/ko1xfy/delete_tik_tok_or_slap_amber_heard/ghpdwee/ | Because there's such a weird fixation on this one instance of abuse. It feels like they don't hate on abusers but on Amber Heard specifically. There was a post on madlads recently where someone was called "madlad" for posting "Amber Heard bad" on her subreddit.\n\nAnd let's face it, this is only because she is a prominent case of a female abuser, if she were male she would be hated but nowhere near the degree she is now, and people would throw around claims of false accusations. | t1_ghoztk6 | r/redditmoment | HardcoreTristesse | 2021-01-01 12:27:12 | Negative | Neutral | 2.0 | comment | comment | 87 | delete_tik_tok_or_slap_amber_heard | 7 | [] | 0 |
| 551 | t1_ghth3rk | /r/Amber_Heard/comments/klcvt6/lol_amber_heard_bad/ghth3rk/ | Yes, amber heard bad | t3_klcvt6 | r/Amber_Heard | Affectionate-Ad-2630 | 2021-01-02 11:12:57 | Negative | Neutral | 1.0 | submission | comment | 4 | Amber_Heard | 2 | [] | 0 |
| 954 | t3_kqfpmi | /r/PewdiepieSubmissions/comments/kqfpmi/amber_heard_bad/ | Amber Heard bad | NaN | r/PewdiepieSubmissions | Raven_Force | 2021-01-04 19:22:03 | Negative | Negative | 1.0 | NaN | submission | 3 | amber_heard_bad | 3 | [] | 0 |
| 1006 | t3_kqosqs | /r/memes/comments/kqosqs/amber_heard_bad/ | Amber heard bad | NaN | r/memes | -banned- | 2021-01-05 02:52:16 | Negative | Negative | 1.0 | NaN | submission | 3 | amber_heard_bad | 3 | [] | 0 |
df_bad.author.value_counts()
-banned- 14 bradje61 3 ReShout876 2 SamGamer012 2 oldmanjenkins51 1 buwud 1 Affectionate-Ad-2630 1 dansonquack 1 Raven_Force 1 Memelord96024 1 DuplicateDestroyer 1 HardcoreTristesse 1 KthulhuX 1 Most-Programmer-1046 1 Sadclocktowernoises 1 LupLush 1 YourMotherSaysHello 1 Lolthisistrash1234 1 skyrise011 1 Name: author, dtype: int64
bad_list = df_bad.author.value_counts().index.to_list()
bad_list = bad_list[1:]
df_users[df_users['user_name'].isin(bad_list)]
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 21296 | YourMotherSaysHello | True | False | False | False | 246346.0 | 8947.0 | 2016-01-22 16:05:21 | others | others |
| 25031 | oldmanjenkins51 | False | False | False | False | 18274.0 | 25907.0 | 2016-09-16 15:38:57 | unverified | others |
| 31420 | KthulhuX | True | True | False | False | 60055.0 | 4839.0 | 2017-08-24 19:04:43 | others | others |
| 47692 | bradje61 | True | False | False | False | 502.0 | 22828.0 | 2019-04-27 09:01:11 | others | 2019 |
| 49465 | dansonquack | True | True | True | False | 4748.0 | 54495.0 | 2019-06-22 11:26:46 | others | 2019 |
| 51861 | LupLush | True | True | False | False | 3054.0 | 27271.0 | 2019-09-02 17:22:19 | others | 2019 |
| 53243 | HardcoreTristesse | True | False | False | False | 10099.0 | 1352.0 | 2019-10-18 10:22:06 | others | 2019 |
| 53906 | Sadclocktowernoises | True | False | False | False | 9199.0 | 1212.0 | 2019-11-08 16:04:34 | others | 2019 |
| 56762 | SamGamer012 | True | True | False | False | 17070.0 | 10489.0 | 2020-01-30 09:31:35 | others | 2020 |
| 56916 | Memelord96024 | True | False | False | False | 308.0 | 6790.0 | 2020-02-02 20:21:28 | others | 2020 |
| 59969 | skyrise011 | True | False | False | False | 335.0 | 623.0 | 2020-05-20 13:37:53 | others | 2020 |
| 60607 | DuplicateDestroyer | True | True | False | False | 81.0 | 111.0 | 2020-06-13 02:32:27 | others | 2020 |
| 61003 | ReShout876 | True | True | False | False | 2648.0 | 6120.0 | 2020-06-27 19:55:09 | others | 2020 |
| 61302 | Raven_Force | True | False | False | False | 64.0 | 1004.0 | 2020-07-07 22:06:13 | others | 2020 |
| 61746 | Affectionate-Ad-2630 | True | False | False | False | 5.0 | 1.0 | 2020-07-25 10:45:26 | others | 2020 |
| 64662 | Most-Programmer-1046 | True | False | False | False | 40.0 | 682.0 | 2020-11-27 18:36:16 | others | 2020 |
| 64998 | Lolthisistrash1234 | True | True | False | False | 2513.0 | 15701.0 | 2020-12-29 03:18:45 | others | 2020 |
| 70249 | buwud | True | True | True | True | NaN | NaN | NaT | banned | banned |
df_bad_contributions = df_bad.groupby(df_bad.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_bad_contributions,
x='created_at',
y='n_contributions', title='The number of "Amber Heard Bad" contributions in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
text = "fuck amber heard"
df_fuc2 = df[df.text.str.lower().str.contains(text)]
print(df_fuc2.shape)
with pd.option_context('display.max_colwidth', None):
display(df_fuc2.head())
(102, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 183 | t3_ko9ymn | /r/memes/comments/ko9ymn/fuck_amber_heard_credit_to_uwhitewolf699420_for/ | Fuck Amber Heard (credit to u/Whitewolf699420 for the template) | NaN | r/memes | -banned- | 2021-01-01 11:39:40 | Negative | Negative | 1.0 | NaN | submission | 9 | fuck_amber_heard_credit_to_uwhitewolf699420_for | 7 | [] | 0 |
| 186 | t3_ko9zmj | /r/dankmemes/comments/ko9zmj/fuck_amber_heard_credit_to_uwhitewolf699420_for/ | Fuck Amber Heard (credit to u/Whitewolf699420 for template) | NaN | r/dankmemes | -banned- | 2021-01-01 11:41:46 | Negative | Negative | 1.0 | NaN | submission | 8 | fuck_amber_heard_credit_to_uwhitewolf699420_for | 7 | [] | 0 |
| 213 | t1_ghphm5v | /r/redditmoment/comments/ko1xfy/delete_tik_tok_or_slap_amber_heard/ghphm5v/ | No but really fuck Amber Heard | t3_ko1xfy | r/redditmoment | Heartstop56 | 2021-01-01 13:00:24 | Negative | Negative | -2.0 | submission | comment | 6 | delete_tik_tok_or_slap_amber_heard | 7 | [] | 0 |
| 388 | t3_kohzpz | /r/Amber_Heard/comments/kohzpz/fuck_amber_heard_all_my_homies_hate_amber_heard/ | Fuck Amber Heard, All my Homies hate Amber Heard #JusticeforJohnnyDepp | NaN | r/Amber_Heard | -banned- | 2021-01-01 20:06:51 | Negative | Negative | 1.0 | NaN | submission | 10 | Amber_Heard | 2 | [] | 0 |
| 684 | t3_kpbsbr | /r/Animemes/comments/kpbsbr/fuck_amber_heard/ | Fuck amber heard | NaN | r/Animemes | dingusbob69 | 2021-01-03 02:38:42 | Negative | Negative | 269.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
df_fuc2.author.value_counts()
-banned- 13
FutureCrusaderX 2
RangaBro 1
throwaway340315 1
NotCayde6 1
..
superzepto 1
fr0ntsight 1
Buckweed_420 1
ReallyBuffHamster 1
TitanGuppie 1
Name: author, Length: 89, dtype: int64
df_fuc2.subreddit.value_counts();
df_fuc2_submissions = df_fuc2.query(" submission_comment == 'submission' ")
print(df_fuc2_submissions.shape)
df_fuc2_submissions.head()
(31, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 183 | t3_ko9ymn | /r/memes/comments/ko9ymn/fuck_amber_heard_cred... | Fuck Amber Heard (credit to u/Whitewolf699420 ... | NaN | r/memes | -banned- | 2021-01-01 11:39:40 | Negative | Negative | 1.0 | NaN | submission | 9 | fuck_amber_heard_credit_to_uwhitewolf699420_for | 7 | [] | 0 |
| 186 | t3_ko9zmj | /r/dankmemes/comments/ko9zmj/fuck_amber_heard_... | Fuck Amber Heard (credit to u/Whitewolf699420 ... | NaN | r/dankmemes | -banned- | 2021-01-01 11:41:46 | Negative | Negative | 1.0 | NaN | submission | 8 | fuck_amber_heard_credit_to_uwhitewolf699420_for | 7 | [] | 0 |
| 388 | t3_kohzpz | /r/Amber_Heard/comments/kohzpz/fuck_amber_hear... | Fuck Amber Heard, All my Homies hate Amber Hea... | NaN | r/Amber_Heard | -banned- | 2021-01-01 20:06:51 | Negative | Negative | 1.0 | NaN | submission | 10 | Amber_Heard | 2 | [] | 0 |
| 684 | t3_kpbsbr | /r/Animemes/comments/kpbsbr/fuck_amber_heard/ | Fuck amber heard | NaN | r/Animemes | dingusbob69 | 2021-01-03 02:38:42 | Negative | Negative | 269.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
| 889 | t3_kq9lal | /r/memes/comments/kq9lal/fuck_amber_heard/ | fuck Amber Heard | NaN | r/memes | guyinAmerica1 | 2021-01-04 14:18:40 | Negative | Negative | 1.0 | NaN | submission | 3 | fuck_amber_heard | 3 | [] | 0 |
df_fuc2_submissions.author.value_counts()
-banned- 12 adolfsdad 1 alexander_2084 1 ryzn02 1 TitanGuppie 1 isaac0304 1 linglingthemetronome 1 itislwiaytime 1 dingusbob69 1 big_pog_human2478 1 OrientalDude 1 EmbodimentOfUwU 1 FrontpageWatch2020 1 LimpingIceberg 1 PlatinumDisc 1 guyinAmerica1 1 Sh0uldBeDoingSchool 1 throwaway340315 1 bigsquibb 1 Flerp6969 1 Name: author, dtype: int64
text = "good bot"
df_good = df[df.text.str.lower().str.contains(text)]
print(df_good.shape)
with pd.option_context('display.max_colwidth', None):
display(df_good.head())
(12, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1937 | t1_ginfp9p | /r/EntitledBitch/comments/ktkm8u/fuck_amber_heard/ginfp9p/ | Good bot | t1_ginev86 | r/EntitledBitch | chixnwafflez | 2021-01-09 12:40:12 | Positive | Positive | 6.0 | comment | comment | 2 | fuck_amber_heard | 3 | [] | 0 |
| 5216 | t1_glpyfc4 | /r/MensRights/comments/lac30r/you_saved_me_johnny_depp_thanks_supporters_for/glpyfc4/ | Good bot | t1_glpye3x | r/MensRights | Amazing-Row-5963 | 2021-02-02 11:58:38 | Positive | Positive | 1.0 | comment | comment | 2 | you_saved_me_johnny_depp_thanks_supporters_for | 8 | [] | 0 |
| 5748 | t1_gmsoxjj | /r/MensRights/comments/lgbi6k/amber_heard_reportedly_in_talks_for_star_wars_role/gmsoxjj/ | Good bot | t1_gmrdo94 | r/MensRights | Icr711 | 2021-02-10 08:15:09 | Positive | Positive | 2.0 | comment | comment | 2 | amber_heard_reportedly_in_talks_for_star_wars_role | 9 | [] | 0 |
| 5798 | t1_gmu3sk1 | /r/MensRights/comments/lgbi6k/amber_heard_reportedly_in_talks_for_star_wars_role/gmu3sk1/ | Good bot, boy! | t1_gmrdo94 | r/MensRights | loucos3 | 2021-02-10 17:10:37 | Positive | Positive | 1.0 | comment | comment | 3 | amber_heard_reportedly_in_talks_for_star_wars_role | 9 | [] | 0 |
| 8736 | t1_gp58cvf | /r/pussypassdenied/comments/lu5055/amber_heard_has_been_fired_from_jason_momoas/gp58cvf/ | Good bot | t1_gp51cwx | r/pussypassdenied | RunningBread888_yt | 2021-02-28 08:04:04 | Positive | Positive | 17.0 | comment | comment | 2 | amber_heard_has_been_fired_from_jason_momoas | 8 | [] | 0 |
text = " bot "
df_bot = df[df.text.str.lower().str.contains(text)]
print(df_bot.shape)
with pd.option_context('display.max_colwidth', None):
display(df_bot.head())
(33, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 70 | t1_ghogbmf | /r/memes/comments/ko5qc9/apkngxpraw_now_theres_the_meeting_code_do_what/ghogbmf/ | Hello /u/thatgamerguy567! Unfortunately, your post has been removed. To protect the subreddit from advertising spam, ban evasions and abuse, we no longer allow new/inactive users to make submissions, you need to build up some post, comment karma and history. In the mean time, please be sure to familiarize yourself with our subreddit rules:\n \n>/r/memes/about/rules\n>\n>r/memes/about/sidebar\n\nIn order to achieve the threshold, we ask you to post to other subs around Reddit, and contribute in comments to build up your post and comment karma. You can hover over your karma on your profile to see the difference between post and comment karma, as we do **not** evaluate your overall karma. This may explain why you often see someone with "lower" overall karma able to post. *You are welcome to comment in r/memes* in order to spend some time getting to know our community, then you'll be welcome to make submissions.\n\n1\. No, the bot did not make a mistake. The bot is excellent at counting.\n\n2\. When the bot stops removing your posts, you'll know that you've finally met our requirements.\n\n3\. We will not tell you how much karma you need.\n\nThanks for understanding.\n\n---\n\n#Please do not message us about this, we need the minimum karma filter to prevent spam. Thank you\n\n---\n\n\n*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/memes) if you have any questions or concerns.* | t3_ko5qc9 | r/memes | AutoModerator | 2021-01-01 05:25:21 | Positive | Neutral | 1.0 | submission | comment | 239 | apkngxpraw_now_theres_the_meeting_code_do_what | 8 | [] | 0 |
| 260 | t1_ghq4xri | /r/meme/comments/kodofd/roles_perfect_for_amber_heard/ghq4xri/ | Hey!\n\nYou've submitted an uncropped image.\n\n[Click here](https://lmgtfy.com/?q=how+to+crop+an+image&s=g) to learn how to crop an image.\n\nOnce you've cropped the image properly, you may resubmit it.\n\nThanks!\n\n-----------------------\n\n*I'm a bot so if I was wrong, reply to me and a moderator will check it.* | t3_kodofd | r/meme | MAGIC_EYE_BOT | 2021-01-01 16:08:52 | Negative | Neutral | 0.0 | submission | comment | 44 | roles_perfect_for_amber_heard | 5 | ['https://lmgtfy.com'] | 1 |
| 399 | t1_ghrcepj | /r/Makeup/comments/kogfek/amber_heard_the_face_of_new_loréal_ad_after/ghrcepj/ | The Hollywood Reporter had a really interesting article on Depp. Apparently he has a very toxic fan base/lots of bot supporters. \n\nhttps://www.hollywoodreporter.com/features/hes-radioactive-inside-johnny-depps-self-made-implosion | t1_ghra7an | r/Makeup | SwimmingCoyote | 2021-01-01 20:58:43 | Positive | Positive | 25.0 | comment | comment | 22 | amber_heard_the_face_of_new_loréal_ad_after | 9 | ['https://www.hollywoodreporter.com'] | 1 |
| 723 | t1_ghxjsqp | /r/Animemes/comments/kpbsbr/fuck_amber_heard/ghxjsqp/ | This submission was removed for breaking Rule 9: Be Nice.\n\n--- \n\nI am a bot and this comment was made automatically. **However, this submission was removed manually by a member of the mod team. If you have questions or concerns about this removal, please contact the moderators through modmail.** | t3_kpbsbr | r/Animemes | AnimemesBot | 2021-01-03 10:11:12 | Positive | Positive | 1.0 | submission | comment | 49 | fuck_amber_heard | 3 | [] | 0 |
| 1064 | t1_gi619f9 | /r/memes/comments/kqvoqr/help_soldier_this_for_some_reason_has_hate/gi619f9/ | Hello /u/cat_t_cat! Unfortunately, your post has been removed. To protect the subreddit from advertising spam, ban evasions and abuse, we no longer allow new/inactive users to make submissions, you need to build up some post, comment karma and history. In the mean time, please be sure to familiarize yourself with our subreddit rules:\n \n>/r/memes/about/rules\n>\n>r/memes/about/sidebar\n\nIn order to achieve the threshold, we ask you to post to other subs around Reddit, and contribute in comments to build up your post and comment karma. You can hover over your karma on your profile to see the difference between post and comment karma, as we do **not** evaluate your overall karma. This may explain why you often see someone with "lower" overall karma able to post. *You are welcome to comment in r/memes* in order to spend some time getting to know our community, then you'll be welcome to make submissions.\n\n1\. No, the bot did not make a mistake. The bot is excellent at counting.\n\n2\. When the bot stops removing your posts, you'll know that you've finally met our requirements.\n\n3\. We will not tell you how much karma you need.\n\nThanks for understanding.\n\n---\n\n#Please do not message us about this, we need the minimum karma filter to prevent spam. Thank you\n\n---\n\n\n*I am a bot, and this action was performed automatically. Please [contact the moderators of this subreddit](/message/compose/?to=/r/memes) if you have any questions or concerns.* | t3_kqvoqr | r/memes | AutoModerator | 2021-01-05 10:18:57 | Positive | Neutral | 1.0 | submission | comment | 239 | help_soldier_this_for_some_reason_has_hate | 8 | [] | 0 |
We can further investigate the submissions
NoGoogleAMPBot
haikusbot
B0tRank
good bot
df_good.created_at.dt.date.value_counts()
2021-02-28 4 2021-03-04 3 2021-02-10 2 2021-01-09 1 2021-02-02 1 2021-05-01 1 Name: created_at, dtype: int64
df_good.subreddit.value_counts()
r/pussypassdenied 4 r/MensRights 3 r/TrueOffMyChest 3 r/EntitledBitch 1 r/awfuleverything 1 Name: subreddit, dtype: int64
df_good.author.value_counts()
loucos3 1 Iamabot3000 1 RunningBread888_yt 1 Amazing-Row-5963 1 chixnwafflez 1 HopelessMelancholy 1 Icr711 1 CaptainJackNarrow 1 jovinyo 1 CatchSufficient 1 apolocheese 1 aeon314159 1 Name: author, dtype: int64
text = "Amber Heard Touching Herself"
df_her = df[df.text.str.contains(text)]
# df_her = df_her.sort_values('created_at')
print(df_her.shape)
df_her.head()
(40, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12416 | t3_mqpbo7 | /r/dekaihentai/comments/mqpbo7/amber_heard_tou... | Amber Heard Touching Herself | NaN | r/dekaihentai | owenlinx | 2021-04-14 12:17:01 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12417 | t3_mqpbp7 | /r/chastity/comments/mqpbp7/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/chastity | owenlinx | 2021-04-14 12:17:03 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12418 | t3_mqpbq4 | /r/1819club/comments/mqpbq4/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/1819club | owenlinx | 2021-04-14 12:17:06 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12419 | t3_mqpbqi | /r/Nekomimi/comments/mqpbqi/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/Nekomimi | owenlinx | 2021-04-14 12:17:06 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12420 | t3_mqpbro | /r/SideLips/comments/mqpbro/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/SideLips | owenlinx | 2021-04-14 12:17:09 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
with pd.option_context('display.max_colwidth', None):
display(df_her.head())
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12416 | t3_mqpbo7 | /r/dekaihentai/comments/mqpbo7/amber_heard_touching_herself/ | Amber Heard Touching Herself | NaN | r/dekaihentai | owenlinx | 2021-04-14 12:17:01 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12417 | t3_mqpbp7 | /r/chastity/comments/mqpbp7/amber_heard_touching_herself/ | Amber Heard Touching Herself | NaN | r/chastity | owenlinx | 2021-04-14 12:17:03 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12418 | t3_mqpbq4 | /r/1819club/comments/mqpbq4/amber_heard_touching_herself/ | Amber Heard Touching Herself | NaN | r/1819club | owenlinx | 2021-04-14 12:17:06 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12419 | t3_mqpbqi | /r/Nekomimi/comments/mqpbqi/amber_heard_touching_herself/ | Amber Heard Touching Herself | NaN | r/Nekomimi | owenlinx | 2021-04-14 12:17:06 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12420 | t3_mqpbro | /r/SideLips/comments/mqpbro/amber_heard_touching_herself/ | Amber Heard Touching Herself | NaN | r/SideLips | owenlinx | 2021-04-14 12:17:09 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
Note: the user "owenlinx" & "garretdanielkelly" behaviour is suspecious, they are posting in different subreddits in the same second!!
# drop the last column of the banned account to check bots behaviour on the same date
df_bot = df_her.iloc[:-1, :]
df_bot.author.value_counts()
owenlinx 20 90police 17 garretdanielkelly 2 Name: author, dtype: int64
df_bot.submission_comment.value_counts()
submission 39 Name: submission_comment, dtype: int64
df_bot_contributions = df_bot.groupby(df_bot.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_bot_contributions,
x='created_at',
y='n_contributions', title='The number of "Amber Heard Touching Herself" contributions in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
df_bot_contributions = df_bot.groupby(df_bot.created_at.dt.time).size().reset_index(name='n_contributions')
fig = px.bar(df_bot_contributions,
x='created_at',
y='n_contributions', title='The number of "Amber Heard Touching Herself" contributions in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
# check these users other activities
df_bots = df[df['author'].isin(['owenlinx', '90police', 'garretdanielkelly'])]
print(df_bots.shape)
df_bots.head()
(39, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12416 | t3_mqpbo7 | /r/dekaihentai/comments/mqpbo7/amber_heard_tou... | Amber Heard Touching Herself | NaN | r/dekaihentai | owenlinx | 2021-04-14 12:17:01 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12417 | t3_mqpbp7 | /r/chastity/comments/mqpbp7/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/chastity | owenlinx | 2021-04-14 12:17:03 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12418 | t3_mqpbq4 | /r/1819club/comments/mqpbq4/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/1819club | owenlinx | 2021-04-14 12:17:06 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12419 | t3_mqpbqi | /r/Nekomimi/comments/mqpbqi/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/Nekomimi | owenlinx | 2021-04-14 12:17:06 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
| 12420 | t3_mqpbro | /r/SideLips/comments/mqpbro/amber_heard_touchi... | Amber Heard Touching Herself | NaN | r/SideLips | owenlinx | 2021-04-14 12:17:09 | Positive | Neutral | 0.0 | NaN | submission | 4 | amber_heard_touching_herself | 4 | [] | 0 |
No other activities in 2021
# check their user data
df_users.query(" user_name in ['owenlinx', '90police', 'garretdanielkelly']")
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 26514 | garretdanielkelly | False | False | False | False | 1061.0 | 1.0 | 2016-12-12 03:40:01 | unverified | others |
| 43427 | owenlinx | False | True | False | False | 11983.0 | 5153.0 | 2019-01-01 09:15:33 | unverified | 2019 |
| 70432 | 90police | True | True | True | True | NaN | NaN | NaT | banned | banned |
Note: both users don't have a verified mail address
text = "Amber Heard in Aquaman 2, why?"
df_aqua = df[df.text.str.contains(text)]
df_aqua = df_aqua.sort_values('created_at')
print(df_aqua.shape)
df_aqua.head()
# .style.applymap(lambda x: helpers.coloring(x, {'Positive': '#deebce', 'Negative': '#edbcbb'}),
# subset=['sentiment_blob', 'sentiment_nltk'])
(13, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4677 | t1_glcu8wu | /r/pussypassdenied/comments/k09s59/the_petitio... | Here's what I did, repost from another subredd... | t3_k09s59 | r/pussypassdenied | Lecrapface | 2021-01-30 10:40:26 | Negative | Neutral | 1.0 | submission | comment | 140 | the_petition_to_fire_amber_heard_from_aquaman_2 | 9 | [] | 0 |
| 4687 | t1_glcvob6 | /r/DC_Movies/comments/k4s16h/amber_heard_aquam... | This is what I did\n\n Well I'm going to start... | t3_k4s16h | r/DC_Movies | Lecrapface | 2021-01-30 11:03:11 | Negative | Neutral | 1.0 | submission | comment | 137 | DC_Movies | 2 | [] | 0 |
| 4689 | t1_glcvp7g | /r/CosmicBookNews/comments/k4s159/amber_heard_... | Well I'm going to start doing preliminary emai... | t3_k4s159 | r/CosmicBookNews | Lecrapface | 2021-01-30 11:03:34 | Negative | Neutral | 1.0 | submission | comment | 132 | amber_heard_aquaman_2_petition_over_16_million | 8 | [] | 0 |
| 4690 | t1_glcvrnt | /r/boxoffice/comments/k336a3/petition_to_axe_a... | Well I'm going to start doing preliminary emai... | t3_k336a3 | r/boxoffice | Lecrapface | 2021-01-30 11:04:39 | Negative | Neutral | 1.0 | submission | comment | 132 | petition_to_axe_amber_heard_from_aquaman_2 | 8 | [] | 0 |
| 4691 | t1_glcvsxj | /r/DC_Cinematic/comments/k336ef/article_petiti... | Well I'm going to start doing preliminary emai... | t3_k336ef | r/DC_Cinematic | Lecrapface | 2021-01-30 11:05:12 | Negative | Neutral | 1.0 | submission | comment | 132 | DC_Cinematic | 2 | [] | 0 |
NOTE: All these text are -ve in sentiment_blob & neutral in sentiment_nltk!!
with pd.option_context('display.max_colwidth', None):
display(df_aqua.head())
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4677 | t1_glcu8wu | /r/pussypassdenied/comments/k09s59/the_petition_to_fire_amber_heard_from_aquaman_2/glcu8wu/ | Here's what I did, repost from another subreddit\n\nWell I'm going to start doing preliminary emails to voice my displeasure that a DV perp is even coming back to make more millions.\n\nAnd I won't be watching that shit or buying it either\n\nEdit: ok so this is who I emailed\n\njessica.zacholl@warnerbros.com\n\nShe's in WB's Media and Press Dept.\n\nAnd this is what I said \n\nSubject: Amber Heard in Aquaman 2, why?\n\nJust curious why a studio such as WB would employ a domestic violence perpetrator for their big release? I've been told Hollywood is progressive, but apparently you can abuse your spouse and that's ok? Well I will not be seeing this movie. I get so tired of the talking about how domestic violence is bad, but it seems all of the movie industry just sits silent. #MeToo indeed. | t3_k09s59 | r/pussypassdenied | Lecrapface | 2021-01-30 10:40:26 | Negative | Neutral | 1.0 | submission | comment | 140 | the_petition_to_fire_amber_heard_from_aquaman_2 | 9 | [] | 0 |
| 4687 | t1_glcvob6 | /r/DC_Movies/comments/k4s16h/amber_heard_aquaman_2_petition_over_16_million/glcvob6/ | This is what I did\n\n Well I'm going to start doing preliminary emails to voice my displeasure that a DV perp is even coming back to make more millions.\n\nAnd I won't be watching that shit or buying it either\n\nEdit: ok so this is who I emailed\n\njessica.zacholl@warnerbros.com\n\nShe's in WB's Media and Press Dept.\n\nAnd this is what I said \n\nSubject: Amber Heard in Aquaman 2, why?\n\nJust curious why a studio such as WB would employ a domestic violence perpetrator for their big release? I've been told Hollywood is progressive, but apparently you can abuse your spouse and that's ok? Well I will not be seeing this movie. I get so tired of the talking about how domestic violence is bad, but it seems all of the movie industry just sits silent. #MeToo indeed. | t3_k4s16h | r/DC_Movies | Lecrapface | 2021-01-30 11:03:11 | Negative | Neutral | 1.0 | submission | comment | 137 | DC_Movies | 2 | [] | 0 |
| 4689 | t1_glcvp7g | /r/CosmicBookNews/comments/k4s159/amber_heard_aquaman_2_petition_over_16_million/glcvp7g/ | Well I'm going to start doing preliminary emails to voice my displeasure that a DV perp is even coming back to make more millions.\n\nAnd I won't be watching that shit or buying it either\n\nEdit: ok so this is who I emailed\n\njessica.zacholl@warnerbros.com\n\nShe's in WB's Media and Press Dept.\n\nAnd this is what I said \n\nSubject: Amber Heard in Aquaman 2, why?\n\nJust curious why a studio such as WB would employ a domestic violence perpetrator for their big release? I've been told Hollywood is progressive, but apparently you can abuse your spouse and that's ok? Well I will not be seeing this movie. I get so tired of the talking about how domestic violence is bad, but it seems all of the movie industry just sits silent. #MeToo indeed. | t3_k4s159 | r/CosmicBookNews | Lecrapface | 2021-01-30 11:03:34 | Negative | Neutral | 1.0 | submission | comment | 132 | amber_heard_aquaman_2_petition_over_16_million | 8 | [] | 0 |
| 4690 | t1_glcvrnt | /r/boxoffice/comments/k336a3/petition_to_axe_amber_heard_from_aquaman_2/glcvrnt/ | Well I'm going to start doing preliminary emails to voice my displeasure that a DV perp is even coming back to make more millions.\n\nAnd I won't be watching that shit or buying it either\n\nEdit: ok so this is who I emailed\n\njessica.zacholl@warnerbros.com\n\nShe's in WB's Media and Press Dept.\n\nAnd this is what I said \n\nSubject: Amber Heard in Aquaman 2, why?\n\nJust curious why a studio such as WB would employ a domestic violence perpetrator for their big release? I've been told Hollywood is progressive, but apparently you can abuse your spouse and that's ok? Well I will not be seeing this movie. I get so tired of the talking about how domestic violence is bad, but it seems all of the movie industry just sits silent. #MeToo indeed. | t3_k336a3 | r/boxoffice | Lecrapface | 2021-01-30 11:04:39 | Negative | Neutral | 1.0 | submission | comment | 132 | petition_to_axe_amber_heard_from_aquaman_2 | 8 | [] | 0 |
| 4691 | t1_glcvsxj | /r/DC_Cinematic/comments/k336ef/article_petition_to_axe_amber_heard_from_aquaman/glcvsxj/ | Well I'm going to start doing preliminary emails to voice my displeasure that a DV perp is even coming back to make more millions.\n\nAnd I won't be watching that shit or buying it either\n\nEdit: ok so this is who I emailed\n\njessica.zacholl@warnerbros.com\n\nShe's in WB's Media and Press Dept.\n\nAnd this is what I said \n\nSubject: Amber Heard in Aquaman 2, why?\n\nJust curious why a studio such as WB would employ a domestic violence perpetrator for their big release? I've been told Hollywood is progressive, but apparently you can abuse your spouse and that's ok? Well I will not be seeing this movie. I get so tired of the talking about how domestic violence is bad, but it seems all of the movie industry just sits silent. #MeToo indeed. | t3_k336ef | r/DC_Cinematic | Lecrapface | 2021-01-30 11:05:12 | Negative | Neutral | 1.0 | submission | comment | 132 | DC_Cinematic | 2 | [] | 0 |
df_aqua.author.value_counts()
Lecrapface 13 Name: author, dtype: int64
df_aqua.subreddit.value_counts()
r/PeopleBeingJerks 1 r/SupportForTheAccused 1 r/uwaterloo 1 r/MensRights 1 r/boxoffice 1 r/PussyPass 1 r/DC_Cinematic 1 r/entertainment 1 r/pussypassdenied 1 r/DC_Movies 1 r/JusticeForJohnnyDepp 1 r/CosmicBookNews 1 r/DeFranco 1 Name: subreddit, dtype: int64
df_aqua.top_level.value_counts()
submission 13 Name: top_level, dtype: int64
This means they are all first comments on submissions!!
df_aqua.submission_comment.value_counts()
comment 13 Name: submission_comment, dtype: int64
# check the time frame in which these comments where written
df_aqua.created_at.dt.date.values[-1] - df_aqua.created_at.dt.date.values[0]
datetime.timedelta(0)
# check the time frame in which these comments where written
df_aqua.created_at.dt.hour.values[-1] - df_aqua.created_at.dt.hour.values[0]
1
NOTE: The same user Lecrapface posted the same comment on 13 different subreddits in the same date in the same hour.
# Check for this user account datah
df_users.query(" user_name == 'Lecrapface' ")
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 63991 | Lecrapface | True | False | False | False | 9790.0 | 7737.0 | 2020-10-25 13:24:13 | others | 2020 |
df_aqua_contributions = df_aqua.groupby(df_aqua.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_aqua_contributions,
x='created_at',
y='n_contributions', title='The number of "Amber Heard in Aquaman 2, why?" contributions by Lecrapface in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
df_aqua_contributions = df_aqua.groupby(df_aqua.created_at.dt.hour).size().reset_index(name='n_contributions')
fig = px.bar(df_aqua_contributions,
x='created_at',
y='n_contributions', title='The number of "Amber Heard in Aquaman 2, why?" contributions by Lecrapface in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
df_aqua_contributions = df_aqua.groupby(df_aqua.created_at.dt.strftime("%H:%M:%S")).size().reset_index(name='n_contributions')
fig = px.bar(df_aqua_contributions,
x='created_at',
y='n_contributions', title='The number of "Amber Heard in Aquaman 2, why?" contributions by Lecrapface in 2021')
fig.update_traces(marker_color='red', marker_line_width=2, opacity=1, textposition='auto')
# , marker_line_color='#5296dd'
fig.show()
suspected_dict['Lecrapface'] = "Created at: 2020-10-25 posted 'Amber Heard in Aquaman 2, why?' on 13 different subreddits in Jan 30, 2021"
print(f'Suspeted Dictionary: {suspected_dict}')
Suspeted Dictionary: {'Jreal22': 'Created at:2019-06-23, used the word f*ck in 28 negative comments in APR 17,18 2021', 'Lecrapface': "Created at: 2020-10-25 posted 'Amber Heard in Aquaman 2, why?' on 13 different subreddits in Jan 30, 2021"}
df_lecrapface = df.query(" author == 'Lecrapface'")
df_lecrapface = df_lecrapface.sort_values('created_at')
print(df_lecrapface.shape)
df_lecrapface.head(2)
(13, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 4677 | t1_glcu8wu | /r/pussypassdenied/comments/k09s59/the_petitio... | Here's what I did, repost from another subredd... | t3_k09s59 | r/pussypassdenied | Lecrapface | 2021-01-30 10:40:26 | Negative | Neutral | 1.0 | submission | comment | 140 | the_petition_to_fire_amber_heard_from_aquaman_2 | 9 | [] | 0 |
| 4687 | t1_glcvob6 | /r/DC_Movies/comments/k4s16h/amber_heard_aquam... | This is what I did\n\n Well I'm going to start... | t3_k4s16h | r/DC_Movies | Lecrapface | 2021-01-30 11:03:11 | Negative | Neutral | 1.0 | submission | comment | 137 | DC_Movies | 2 | [] | 0 |
Lecrapface, this user has no other contributions in 2021 except for these 13 comments against AH in Aquaman!!
Check the number of text words
Of course few words are easier for bots to create
df['text_words'].value_counts().head(10);
px.histogram(df['text_words'].to_frame(), x="text_words",title='number of words in each contribution',
nbins=200).update_traces(marker_color='#5296dd')
The number of parent comments on submissions¶
px.bar(data_frame=df['top_level'].value_counts().to_frame().reset_index(),
x="index", y="top_level").update_layout(title='Comment or Submission (Top Level of Contrbution "Parent")',
xaxis_title='contribution top level (parent) category',
yaxis_title='number of contributions').update_traces(marker_color='#5296dd')
This means that we have about 7K parent comments on submissions (not replies).
Investigating the Submission Text
(Submissions with the most comments and replies)
We can get the number of different submissions by looking only at the submissions dataframe
Also we can look at submission_text with the most interactions (repeated submission_text)
df['submission_text'].value_counts().head(10)
johnny_depp_releases_lapd_bodycam_footage_proving 1169 amber_heard 1068 amber_heard_has_been_fired_from_jason_momoas 933 emilia_clarke_to_replace_amber_heard_as_mera_in 739 DC_Cinematic 707 its_disgusting_that_people_are_less_angry_about 686 amber_heard_is_a_monster_a_gold_digger_looking 609 amber_heard_still_in_aquaman_2_despite_proof_that 481 fuck_amber_heard 376 does_the_amber_heard_story_prove_that_men_will 358 Name: submission_text, dtype: int64
This could be either submissions with more comments and replies, or different submissions with the same text, we cannot know for sure.
df_mera = df.query(" submission_text == 'emilia_clarke_to_replace_amber_heard_as_mera_in' & \
submission_comment == 'submission' ")
print(df_mera.shape)
with pd.option_context('display.max_colwidth', None):
display(df_mera.head())
(6, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6601 | t3_lo748q | /r/entertainment/comments/lo748q/emilia_clarke_to_replace_amber_heard_as_mera_in/ | Emilia Clarke to Replace Amber Heard As Mera In Aquaman 2? - FandomWire | NaN | r/entertainment | UltraMagnus0001 | 2021-02-20 13:27:20 | Neutral | Neutral | 17412.0 | NaN | submission | 13 | emilia_clarke_to_replace_amber_heard_as_mera_in | 9 | [] | 0 |
| 7459 | t3_lon83n | /r/EmiliaClarke/comments/lon83n/emilia_clarke_to_replace_amber_heard_as_mera_in/ | Emilia Clarke to Replace Amber Heard As Mera In Aquaman 2? | NaN | r/EmiliaClarke | -banned- | 2021-02-21 02:47:15 | Neutral | Neutral | 1.0 | NaN | submission | 11 | emilia_clarke_to_replace_amber_heard_as_mera_in | 9 | [] | 0 |
| 7568 | t3_lovxnz | /r/movies/comments/lovxnz/emilia_clarke_to_replace_amber_heard_as_mera_in/ | Emilia Clarke to Replace Amber Heard As Mera In Aquaman 2? - FandomWire | NaN | r/movies | -banned- | 2021-02-21 12:15:57 | Neutral | Neutral | 9.0 | NaN | submission | 13 | emilia_clarke_to_replace_amber_heard_as_mera_in | 9 | [] | 0 |
| 7579 | t3_low6va | /r/movies/comments/low6va/emilia_clarke_to_replace_amber_heard_as_mera_in/ | Emilia Clarke to Replace Amber Heard As Mera In Aquaman 2? | NaN | r/movies | Watcher2020 | 2021-02-21 12:32:33 | Neutral | Neutral | 21.0 | NaN | submission | 11 | emilia_clarke_to_replace_amber_heard_as_mera_in | 9 | [] | 0 |
| 7715 | t3_lp6ttu | /r/MensRights/comments/lp6ttu/emilia_clarke_to_replace_amber_heard_as_mera_in/ | Emilia Clarke to Replace Amber Heard As Mera In Aquaman 2? | NaN | r/MensRights | UnHope20 | 2021-02-21 21:15:31 | Neutral | Neutral | 132.0 | NaN | submission | 11 | emilia_clarke_to_replace_amber_heard_as_mera_in | 9 | [] | 0 |
6 Different Submissions
https://www.reddit.com/r/MensRights/comments/lp6ttu/emilia_clarke_to_replace_amber_heard_as_mera_in/
Note this submission was deleted (has unverified mail)
li = list(df_mera.author.unique())
li.remove('-banned-')
df_users[df_users.user_name.isin(li)]
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 4654 | UltraMagnus0001 | True | False | False | False | 51696.0 | 18214.0 | 2012-01-25 22:06:22 | others | others |
| 47731 | Watcher2020 | True | False | False | False | 123.0 | 56960.0 | 2019-04-28 20:55:03 | others | 2019 |
| 60311 | UnHope20 | False | True | False | False | 4497.0 | 8588.0 | 2020-06-01 20:46:13 | unverified | 2020 |
df_mera_comments = df.query(" submission_text == 'emilia_clarke_to_replace_amber_heard_as_mera_in' ")
print(df_mera_comments.shape)
df_mera_comments.head(1)
(739, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6601 | t3_lo748q | /r/entertainment/comments/lo748q/emilia_clarke... | Emilia Clarke to Replace Amber Heard As Mera I... | NaN | r/entertainment | UltraMagnus0001 | 2021-02-20 13:27:20 | Neutral | Neutral | 17412.0 | NaN | submission | 13 | emilia_clarke_to_replace_amber_heard_as_mera_in | 9 | [] | 0 |
df_mera_contributions = df_mera_comments.groupby(df_mera_comments.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_mera_contributions.head(7),
x='created_at',
y='n_contributions', title='The number of contributions/date on these submissions')
fig.update_layout(
xaxis = dict(
title='Contribution Date',
tickmode = 'array',
tickvals = df_mera_contributions.head(7).created_at,
)
)
clrs = ['red' if (y > 200) else '#5296dd' for y in df_mera_contributions.n_contributions]
fig.update_traces(marker_color=clrs, marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
df_mera_authors = df_mera_comments.groupby(df_mera_comments.author).size().reset_index(name='n_contributions')
fig = px.bar(df_mera_authors,
x='author',
y='n_contributions', title='The number of contributions per author on these submissions')
fig.update_traces(marker_color='#5296dd', opacity=1, textposition='auto')
# , marker_line_color='#5296dd', marker_line_width=2
fig.update_yaxes(range = [0,25])
fig.show()
df_mera_comments[df_mera_comments.author == 'someonesgranpa'];
df_momos = df.query(" submission_text == 'amber_heard_has_been_fired_from_jason_momoas' & \
submission_comment == 'submission' ")
print(df_momos.shape)
with pd.option_context('display.max_colwidth', None):
display(df_momos.head())
(4, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8565 | t3_lu1659 | /r/byebyejob/comments/lu1659/amber_heard_has_been_fired_from_jason_momoas/ | Amber Heard Has Been Fired From Jason Momoa's "Aquaman 2" | NaN | r/byebyejob | ToyaW31 | 2021-02-28 00:31:48 | Neutral | Negative | 1.0 | NaN | submission | 10 | amber_heard_has_been_fired_from_jason_momoas | 8 | [] | 0 |
| 8585 | t3_lu5055 | /r/pussypassdenied/comments/lu5055/amber_heard_has_been_fired_from_jason_momoas/ | Amber Heard Has Been Fired From Jason Momoa's "Aquaman 2" | NaN | r/pussypassdenied | MahatmaGandamnit | 2021-02-28 03:51:34 | Neutral | Negative | 29511.0 | NaN | submission | 10 | amber_heard_has_been_fired_from_jason_momoas | 8 | [] | 0 |
| 8588 | t3_lu587o | /r/byebyejob/comments/lu587o/amber_heard_has_been_fired_from_jason_momoas/ | Amber Heard Has Been Fired From Jason Momoa's "Aquaman 2" | NaN | r/byebyejob | Weezy-NJPW_Fan | 2021-02-28 03:59:39 | Neutral | Negative | 1.0 | NaN | submission | 10 | amber_heard_has_been_fired_from_jason_momoas | 8 | [] | 0 |
| 9586 | t3_lwihhx | /r/MensRights/comments/lwihhx/amber_heard_has_been_fired_from_jason_momoas/ | Amber Heard Has Been Fired From Jason Momoa's "Aquaman 2": Report | NaN | r/MensRights | PeonSupreme | 2021-03-03 02:02:38 | Neutral | Negative | 1.0 | NaN | submission | 11 | amber_heard_has_been_fired_from_jason_momoas | 8 | [] | 0 |
4 Different Submissions
The 3 other posts were deleted except for this one
li = list(df_momos.author.unique())
df_users[df_users.user_name.isin(li)]
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 25395 | MahatmaGandamnit | True | False | False | False | 7219.0 | 10995.0 | 2016-10-10 00:01:25 | others | others |
| 34393 | PeonSupreme | True | True | False | False | 39131.0 | 137125.0 | 2018-01-24 19:00:30 | others | 2018 |
| 49236 | ToyaW31 | True | False | False | False | 1133.0 | 2899.0 | 2019-06-14 23:05:24 | others | 2019 |
| 61898 | Weezy-NJPW_Fan | True | False | True | False | 911523.0 | 1940933.0 | 2020-07-31 02:27:31 | others | 2020 |
df_momos_comments = df.query(" submission_text == 'amber_heard_has_been_fired_from_jason_momoas' ")
print(df_momos_comments.shape)
df_momos_comments.head(1)
(933, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8565 | t3_lu1659 | /r/byebyejob/comments/lu1659/amber_heard_has_b... | Amber Heard Has Been Fired From Jason Momoa's ... | NaN | r/byebyejob | ToyaW31 | 2021-02-28 00:31:48 | Neutral | Negative | 1.0 | NaN | submission | 10 | amber_heard_has_been_fired_from_jason_momoas | 8 | [] | 0 |
df_momos_contributions = df_momos_comments.groupby(df_momos_comments.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_momos_contributions.head(4),
x='created_at',
y='n_contributions', title='The number of contributions/date on these submissions')
fig.update_layout(
xaxis = dict(
title='Contribution Date',
tickmode = 'array',
tickvals = df_momos_contributions.head(4).created_at,
)
)
clrs = ['red' if (y > 200) else '#5296dd' for y in df_momos_contributions.n_contributions]
fig.update_traces(marker_color=clrs, marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
df_momos_authors = df_momos_comments.groupby(df_momos_comments.author).size().reset_index(name='n_contributions')
fig = px.bar(df_momos_authors,
x='author',
y='n_contributions', title='The number of contributions per author on these submissions')
fig.update_traces(marker_color='#5296dd', opacity=1, textposition='auto')
# , marker_line_color='#5296dd', marker_line_width=2
fig.update_yaxes(range = [0,25])
fig.show()
df_momos_comments[df_momos_comments.author == 'JenkinMan'];
# All negative comments
df_momos_comments[df_momos_comments.author == 'JenkinMan']
df_users.query(" user_name in ['JenkinMan']")
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 50564 | JenkinMan | True | True | False | False | 4799.0 | 793.0 | 2019-07-26 18:53:47 | others | 2019 |
df_lapd = df.query(" submission_text == 'johnny_depp_releases_lapd_bodycam_footage_proving' & \
submission_comment == 'submission' ")
print(df_lapd.shape)
with pd.option_context('display.max_colwidth', None):
display(df_lapd.head(1))
(3, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12654 | t3_msgoz8 | /r/entertainment/comments/msgoz8/johnny_depp_releases_lapd_bodycam_footage_proving/ | Johnny Depp releases LAPD bodycam footage proving Amber Heard lied about the fight that ended their marriage | NaN | r/entertainment | Gato1980 | 2021-04-17 00:54:16 | Neutral | Negative | 15660.0 | NaN | submission | 17 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
Three different submissions
df_users.query(" user_name in ['Gato1980', 'autotldr', 'Danger3214']")
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 4713 | autotldr | True | True | False | False | 2752068.0 | 286509.0 | 2012-01-30 16:39:40 | others | others |
| 25954 | Gato1980 | True | False | True | False | 440406.0 | 1591399.0 | 2016-11-12 21:45:46 | others | others |
| 70420 | Danger3214 | True | True | True | True | NaN | NaN | NaT | banned | banned |
df_lapd_comments = df.query(" submission_text == 'johnny_depp_releases_lapd_bodycam_footage_proving' ")
print(df_lapd_comments.shape)
df_lapd_comments.head()
(1169, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 12654 | t3_msgoz8 | /r/entertainment/comments/msgoz8/johnny_depp_r... | Johnny Depp releases LAPD bodycam footage prov... | NaN | r/entertainment | Gato1980 | 2021-04-17 00:54:16 | Neutral | Negative | 15660.0 | NaN | submission | 17 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 12655 | t1_gusgmjz | /r/entertainment/comments/msgoz8/johnny_depp_r... | [deleted] | t3_msgoz8 | r/entertainment | -banned- | 2021-04-17 00:56:13 | Neutral | Neutral | -14.0 | submission | comment | 1 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 12656 | t3_msgtce | /r/autotldr/comments/msgtce/johnny_depp_releas... | Johnny Depp releases LAPD bodycam footage prov... | NaN | r/autotldr | autotldr | 2021-04-17 01:01:41 | Neutral | Negative | 65.0 | NaN | submission | 17 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 12658 | t1_gusigwx | /r/entertainment/comments/msgoz8/johnny_depp_r... | [deleted] | t3_msgoz8 | r/entertainment | -banned- | 2021-04-17 01:13:03 | Neutral | Neutral | -70.0 | submission | comment | 1 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
| 12659 | t1_guslddq | /r/entertainment/comments/msgoz8/johnny_depp_r... | Who cares about his career. Let the truth come... | t1_gusgmjz | r/entertainment | Temporary_Draw_4708 | 2021-04-17 01:39:22 | Neutral | Positive | 13.0 | comment | comment | 10 | johnny_depp_releases_lapd_bodycam_footage_proving | 7 | [] | 0 |
df_lapd_contributions = df_lapd_comments.groupby(df_lapd_comments.created_at.dt.date).size().reset_index(name='n_contributions')
fig = px.bar(df_lapd_contributions.head(),
x='created_at',
y='n_contributions', title='The number of contributions/date on these submissions')
fig.update_layout(
xaxis = dict(
title='Contribution Date',
tickmode = 'array',
tickvals = df_lapd_contributions.head().created_at,
)
)
clrs = ['red' if (y > 200) else '#5296dd' for y in df_lapd_contributions.n_contributions]
fig.update_traces(marker_color=clrs, marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
df_lapd_authors = df_lapd_comments.groupby(df_lapd_comments.author).size().reset_index(name='n_contributions')
fig = px.bar(df_lapd_authors,
x='author',
y='n_contributions', title='The number of contributions per author on these submissions')
fig.update_traces(marker_color='#5296dd', opacity=1, textposition='auto')
# , marker_line_color='#5296dd', marker_line_width=2
fig.update_yaxes(range = [0,25])
fig.show()
Invesigating the sumbissions with most comments
(Top Level Comments)
df.parent_id.value_counts().head()
t3_lu5055 318 t3_lo748q 233 t3_n9lwnf 193 t3_lx2s7w 186 t3_kyj460 167 Name: parent_id, dtype: int64
fig = px.bar(df.parent_id.value_counts().to_frame().head(25).reset_index(), x="parent_id", y="index",
height=500,
title='sumbissions with most comments (Top Level Comments)').update_layout(
xaxis_title='Number of comments',
yaxis_title='subbredit').update_traces(marker_color='#5296dd')
fig.update_yaxes(autorange="reversed")
df_top5 = df_merged[df_merged.child_id.isin(df_merged.parent_id.value_counts().head().index)]
with pd.option_context('display.max_colwidth', None):
display(df_top5)
| child_id | permalink | text | parent_id | subreddit | created_at | sentiment_blob | sentiment_nltk | score | top_level | ... | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | diff | days_after_creation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 968 | t3_lx2s7w | /r/TrueOffMyChest/comments/lx2s7w/its_disgusting_that_people_are_less_angry_about/ | Its disgusting that people are less angry about what Amber Heard did to Johnny Depp and more angry about her hurting their “movement” | NaN | r/TrueOffMyChest | 2021-03-03 19:54:33 | Negative | Negative | 12447.0 | NaN | ... | False | False | False | 77234.0 | 82.0 | 2011-11-06 21:08:20 | others | others | 3404 days 22:46:13 | 3404.0 |
| 5691 | t3_kyj460 | /r/entertainment/comments/kyj460/amber_heard_is_a_monster_a_gold_digger_looking/ | Amber Heard Is A Monster, A Gold Digger Looking For Any Opportunity Says Raul Julia Jr. | NaN | r/entertainment | 2021-01-16 13:41:16 | Neutral | Positive | 11894.0 | NaN | ... | True | False | False | 588.0 | 90527.0 | 2019-08-19 14:11:13 | others | 2019 | 515 days 23:30:03 | 515.0 |
| 8906 | t3_lo748q | /r/entertainment/comments/lo748q/emilia_clarke_to_replace_amber_heard_as_mera_in/ | Emilia Clarke to Replace Amber Heard As Mera In Aquaman 2? - FandomWire | NaN | r/entertainment | 2021-02-20 13:27:20 | Neutral | Neutral | 17412.0 | NaN | ... | False | False | False | 51696.0 | 18214.0 | 2012-01-25 22:06:22 | others | others | 3313 days 15:20:58 | 3313.0 |
| 10577 | t3_lu5055 | /r/pussypassdenied/comments/lu5055/amber_heard_has_been_fired_from_jason_momoas/ | Amber Heard Has Been Fired From Jason Momoa's "Aquaman 2" | NaN | r/pussypassdenied | 2021-02-28 03:51:34 | Neutral | Negative | 29511.0 | NaN | ... | False | False | False | 7219.0 | 10995.0 | 2016-10-10 00:01:25 | others | others | 1602 days 03:50:09 | 1602.0 |
| 17009 | t3_n9lwnf | /r/iamatotalpieceofshit/comments/n9lwnf/amber_heard_still_in_aquaman_2_despite_proof_that/ | Amber heard still in aquaman 2 despite proof that she's an abuser | NaN | r/iamatotalpieceofshit | 2021-05-11 01:44:28 | Neutral | Negative | 9286.0 | NaN | ... | False | False | False | 908.0 | 9297.0 | 2017-04-20 22:30:30 | others | others | 1481 days 03:13:58 | 1481.0 |
5 rows × 24 columns
df_merged.submission_text = df_merged.submission_text.str.replace('_', ' ')
# get a list with the top 5 submission text
top5_text = list(df_top5.text)
def compare(str):
for text in top5_text:
if str in text:
return True
else: return False
mask = df_merged.submission_text.apply(compare)
df_top5_contributions1 = df_merged[mask]
df_top5_authors = df_top5_contributions1.groupby(df_top5_contributions1.user_name).size().reset_index(name='n_contributions')
fig = px.bar(df_top5_authors,
x='user_name',
y='n_contributions', title='The number comments per author on these submissions')
fig.update_traces(marker_color='#5296dd', opacity=1, textposition='auto')
fig.show()
df_top5_contributions1.shape
(45, 24)
This approach didn't work well as it got contributions less than parent comments
df_top5_contributions2 = df_merged[df_merged.parent_id.isin(df_merged.parent_id.value_counts().head().index)]
df_top5_authors = df_top5_contributions2.groupby(df_top5_contributions2.user_name).size().reset_index(name='n_contributions')
fig = px.bar(df_top5_authors,
x='user_name',
y='n_contributions', title='The number of parent comments per author on these submissions')
fig.update_traces(marker_color='#5296dd', opacity=1, textposition='auto')
# , marker_line_color='#5296dd', marker_line_width=2
fig.update_yaxes(range = [0,5])
fig.show()
NOTE: There are 50 parent comments from banned accounts
df_top5_contributions2.shape
(1097, 24)
Invesigating authors with the most submissions¶
df_submissions = df[df.submission_comment == 'submission']
print(df_submissions.shape)
df_submissions.head(2)
(1925, 17)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | t3_ko1duk | /r/CelebBattles/comments/ko1duk/marvel_vs_dc_t... | Marvel vs DC : Team Marvel (Evangeline Lilly B... | NaN | r/CelebBattles | -banned- | 2021-01-01 00:21:46 | Neutral | Positive | 184.0 | NaN | submission | 25 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | [] | 0 |
| 1 | t3_ko1ew0 | /r/CelebbattlePolls/comments/ko1ew0/marvel_vs_... | Marvel vs DC : Team Marvel (Evangeline Lilly B... | NaN | r/CelebbattlePolls | CelebBattleVoteBot | 2021-01-01 00:23:29 | Neutral | Positive | 1.0 | NaN | submission | 25 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | [] | 0 |
df_submissions.author.value_counts().nlargest(n=10)
-banned- 535 charliedba 98 LoveAmberHeard42286 97 Truthbetheprejudice 43 CelebBattleVoteBot 42 YahYah241 26 owenlinx 20 the-speed-of-pain 20 shalanarose 20 90police 17 Name: author, dtype: int64
df_submissions.author.value_counts().to_frame().head(10)
| author | |
|---|---|
| -banned- | 535 |
| charliedba | 98 |
| LoveAmberHeard42286 | 97 |
| Truthbetheprejudice | 43 |
| CelebBattleVoteBot | 42 |
| YahYah241 | 26 |
| owenlinx | 20 |
| the-speed-of-pain | 20 |
| shalanarose | 20 |
| 90police | 17 |
fig = px.bar(df_submissions.author.value_counts().to_frame().head(10).reset_index(), x="author", y="index",
height=500,
title='Authors with most Submissions').update_traces(marker_color='#5296dd',).update_layout(
xaxis_title='Number of Negative Submissions',
yaxis_title='Author_Name').update_traces(marker_color='#5296dd')
fig.update_yaxes(autorange="reversed")
Check wether the users with the most submissions are mod, gold or having a verified email¶
df_submissions.author.value_counts().head()
-banned- 535 charliedba 98 LoveAmberHeard42286 97 Truthbetheprejudice 43 CelebBattleVoteBot 42 Name: author, dtype: int64
check_list = df_submissions.author.value_counts().nlargest(n=25).index.tolist()[1:]
check_list
['charliedba', 'LoveAmberHeard42286', 'Truthbetheprejudice', 'CelebBattleVoteBot', 'YahYah241', 'owenlinx', 'the-speed-of-pain', 'shalanarose', '90police', 'celebnationofficial', 'Sleeper1034', 'newsfeedmedia', 'Just_Perception_7965', 'cptn3sm10', 'SableFilms', 'templederr', 'chicago_bastard', 'Bill8379', 'AdricDrake', 'Korrocks', 'MikiSayaka33', 'Stark8945', 'VAYU3594', 'krttz35']
# get a data frame with the most negative-comments users
df_check = df_users[df_users['user_name'].isin(check_list)]
print(df_check.shape)
df_check.head(2)
(24, 10)
| user_name | has_verified_email | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | |
|---|---|---|---|---|---|---|---|---|---|---|
| 21343 | MikiSayaka33 | True | True | False | False | 27550.0 | 49114.0 | 2016-01-26 02:48:25 | others | others |
| 26383 | SableFilms | True | True | False | False | 124.0 | 14007.0 | 2016-12-06 14:58:34 | others | others |
df_check['user_name'].nunique()
24
for col in df_check.columns:
if col not in ['user_name', 'user_created_at']:
print('The value counts of the users with the most submissions: ' + col)
print(df_check[col].value_counts())
print('\n')
The value counts of the users with the most submissions: has_verified_email True 23 False 1 Name: has_verified_email, dtype: int64 The value counts of the users with the most submissions: is_mod True 16 False 8 Name: is_mod, dtype: int64 The value counts of the users with the most submissions: is_gold False 19 True 5 Name: is_gold, dtype: int64 The value counts of the users with the most submissions: is_banned False 20 True 4 Name: is_banned, dtype: int64 The value counts of the users with the most submissions: comment_karma 0.0 2 754.0 2 101.0 1 11983.0 1 2.0 1 27550.0 1 124.0 1 405.0 1 285.0 1 5.0 1 146.0 1 1436.0 1 40.0 1 95386.0 1 52.0 1 25.0 1 1063.0 1 1.0 1 Name: comment_karma, dtype: int64 The value counts of the users with the most submissions: link_karma 3388.0 1 20315.0 1 11539.0 1 74998.0 1 8374.0 1 25263.0 1 1641.0 1 2698.0 1 49114.0 1 749.0 1 865.0 1 32197.0 1 5153.0 1 2012.0 1 175.0 1 19887.0 1 16991.0 1 14007.0 1 1653.0 1 1.0 1 Name: link_karma, dtype: int64 The value counts of the users with the most submissions: banned_unverified others 19 banned 4 unverified 1 Name: banned_unverified, dtype: int64 The value counts of the users with the most submissions: creation_year 2020 7 2021 6 banned 4 2019 4 others 2 2018 1 Name: creation_year, dtype: int64
NOTE: About 50% of accounts with the most submissions created in 2020,2021.
df['urls'].nunique()
233
df[df.astype(str)['urls'] != '[]'].head(2)
| child_id | permalink | text | parent_id | subreddit | author | created_at | sentiment_blob | sentiment_nltk | score | top_level | submission_comment | text_words | submission_text | submission_words | urls | urls_count | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2 | t1_ghnp3w4 | /r/CelebbattlePolls/comments/ko1ew0/marvel_vs_... | Poll for [Marvel vs DC : Team Marvel (Evangeli... | t3_ko1ew0 | r/CelebbattlePolls | CelebBattleVoteBot | 2021-01-01 00:23:30 | Neutral | Positive | 1.0 | submission | comment | 29 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | ['https://reddit.com'] | 1 |
| 3 | t1_ghnp3wq | /r/CelebBattles/comments/ko1duk/marvel_vs_dc_t... | Vote here: https://www.reddit.com/poll/ko1ew0\... | t3_ko1duk | r/CelebBattles | CelebBattleVoteBot | 2021-01-01 00:23:30 | Positive | Neutral | 1.0 | submission | comment | 12 | marvel_vs_dc_team_marvel_evangeline_lilly_brie | 8 | ['https://www.reddit.com'] | 1 |
df['urls'].astype('str').value_counts().head()
[] 17522 ['https://jerkofftocelebs.com', 'https://jerkofftocelebs.com', 'https://reddit.com', 'https://jerkofftocelebs.com', 'https://discord.gg', 'https://jerkofftocelebs.com'] 161 ['https://www.reddit.com'] 49 ['https://reddit.com'] 43 ['https://redd.it', 'https://www.instagram.com'] 42 Name: urls, dtype: int64
# the value counts of the # of urls
df['urls_count'].value_counts();
fig = px.histogram(df['urls_count'].to_frame(), x="urls_count",title='Count of the number of URLS in each Contribution',
nbins=130).update_traces(marker_color='#5296dd')
fig.update_layout(
xaxis = dict(
tickmode = 'array',
tickvals = df['urls_count'],
)
)
fig.show()
px.histogram(df[~(df['urls_count'].isin([0,60,20,16]))], x="urls_count",title='Count of the number of URLS in each Contribution',
nbins=20).update_traces(marker_color='#5296dd')
Check the number of submission text words
Of course few words are easier for bots to create
fig = px.histogram(df['submission_words'].to_frame(), x="submission_words",
title='number of words in submission text',
nbins=50).update_traces(marker_color='#5296dd')
fig.update_layout(
xaxis = dict(
title='Number of submission words',
tickmode = 'linear',
)
)
Most used Subreddits¶
df['subreddit'].nunique()
545
df['subreddit'] = df['subreddit'].str[:]
df.subreddit.value_counts().to_frame().head(20).reset_index()
| index | subreddit | |
|---|---|---|
| 0 | r/entertainment | 2697 |
| 1 | r/pussypassdenied | 1788 |
| 2 | r/MensRights | 1266 |
| 3 | r/JerkOffToCelebs | 1249 |
| 4 | r/iamatotalpieceofshit | 905 |
| 5 | r/TrueOffMyChest | 748 |
| 6 | r/DC_Cinematic | 707 |
| 7 | r/AskReddit | 602 |
| 8 | r/CelebBattles | 571 |
| 9 | r/movies | 484 |
| 10 | r/EntitledBitch | 382 |
| 11 | r/PurplePillDebate | 358 |
| 12 | r/redditmoment | 280 |
| 13 | r/awfuleverything | 269 |
| 14 | r/JusticeForJohnnyDepp | 219 |
| 15 | r/PrequelMemes | 202 |
| 16 | r/gameofthrones | 184 |
| 17 | r/SubredditDrama | 181 |
| 18 | r/celebnsfw | 169 |
| 19 | r/pickoneceleb | 148 |
fig = px.bar(df.subreddit.value_counts().to_frame().head(20).reset_index(), x="subreddit", y="index",
height=500,
title='Most used subbredits').update_traces(marker_color='#5296dd',).update_layout(
xaxis_title='Number of comments',
yaxis_title='subbredit').update_traces(marker_color='#5296dd')
fig.update_yaxes(autorange="reversed")
Merged Users Data with Comments & Submissions Data¶
Difference in time between creating the account and posting¶
# note that value_counts() neglect Zeros
df_merged["days_after_creation"].value_counts()
3449.0 60
3343.0 59
3393.0 57
3405.0 55
3394.0 54
..
2904.0 1
3993.0 1
1790.0 1
1789.0 1
2150.0 1
Name: days_after_creation, Length: 3290, dtype: int64
px.histogram(df_merged, x="days_after_creation",title='days_after_creation',
nbins=250).update_traces(marker_color='#5296dd',).update_layout(
xaxis_title='number of days',)
print('The number of accounts posted the same day they was created!')
df_merged[df_merged['days_after_creation'] == 0].shape[0]
The number of accounts posted the same day they was created!
45
print('The number of accounts posted the same week they was created!')
df_merged[df_merged['days_after_creation'] <= 7].shape[0]
The number of accounts posted the same week they was created!
150
print('The number of accounts posted the same month they was created!')
df_merged[df_merged['days_after_creation'] <= 30].shape[0]
The number of accounts posted the same month they was created!
451
df_merged[df_merged['days_after_creation'] <= 30]['user_created_at'].dt.year.value_counts()
2021 385 2020 66 Name: user_created_at, dtype: int64
we can find that 323 accounts created and posted within the same month in 2021.
mask = (df_merged['days_after_creation'] <= 30) & (df_merged['user_created_at'].dt.year == 2021)
df_merged[mask]['user_created_at'].dt.strftime('%b').value_counts()
Jan 105 Feb 82 Apr 82 Mar 80 May 36 Name: user_created_at, dtype: int64
months = df_merged[df_merged['days_after_creation'] <= 30]['user_created_at'].dt.strftime('%b')
months_sorted = months.value_counts()[['Jan', 'Feb', 'Mar', 'Apr', 'May']]
months_sorted
Jan 105 Feb 82 Mar 80 Apr 82 May 36 Name: user_created_at, dtype: int64
fig = px.bar(months_sorted,
x=months_sorted.index, y=months_sorted.values, text=months_sorted.values)
fig.update_layout(
title={
'text': "contributions of the accounts posted/commented <br> the same month they were created",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
})
# fig.update_layout(
# xaxis = dict(
# title='Month(2021)',
# tickmode = 'array',
# tickvals = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
# ticktext = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
# )
# )
clrs = ['red' if (y > 100) else '#5296dd' for y in months_sorted.values]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
# months_sorted.plot(kind='bar', figsize=(8,8), title='contributions of the accounts posted/commented \n the same month they were created');
# THE SAME MONTH:
# check for the date these accounts posted/commented
reddit_30 = df_merged[df_merged['days_after_creation'] <= 30]
dates_count = df_merged.groupby(reddit_30['created_at'].dt.date).size().reset_index(name='contributions')
dates_count.sort_values('contributions', ascending=False);
fig = px.bar(dates_count,
x='created_at',
y='contributions', title = 'contributions of the accounts posted/commented the same month they were created')
fig.update_traces(marker_color='#5296dd',
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
# THE SAME WEEK
# check for the date these accounts posted/commented
reddit_7 = df_merged[df_merged['days_after_creation'] <= 7]
dates_count_7 = df_merged.groupby(reddit_7['created_at'].dt.date).size().reset_index(name='contributions')
dates_count_7.sort_values('contributions', ascending=False);
fig = px.bar(dates_count_7,
x='created_at',
y='contributions', title = 'contributions of the accounts posted/commented the same week they were created')
fig.update_traces(marker_color='#5296dd',
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
# THE SAME DAY
# check for the date these accounts posted/commented
reddit_1 = df_merged[df_merged['days_after_creation'] <= 0]
dates_count_1 = df_merged.groupby(reddit_1['created_at'].dt.date).size().reset_index(name='contributions')
dates_count_1.sort_values('contributions', ascending=False);
fig = px.bar(dates_count_1,
x='created_at',
y='contributions', title = 'contributions of the accounts posted/commented the same day they were created')
fig.update_traces(marker_color='#5296dd',
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
# get the author names that commented in a negative way the same month the account was created
# to add to the suspected list
df_merged_30 = df_merged.query("days_after_creation <= 30 & sentiment_blob == sentiment_nltk == 'Negative' ")
df_merged_30.head()
| child_id | permalink | text | parent_id | subreddit | created_at | sentiment_blob | sentiment_nltk | score | top_level | ... | is_mod | is_gold | is_banned | comment_karma | link_karma | user_created_at | banned_unverified | creation_year | diff | days_after_creation | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2784 | t1_ghra7an | /r/Makeup/comments/kogfek/amber_heard_the_face... | Um... wtf. A UK court found that Johnny did ph... | t3_kogfek | r/Makeup | 2021-01-01 20:39:19 | Negative | Negative | 113.0 | submission | ... | False | False | False | 13294.0 | 3484.0 | 2020-12-25 03:08:50 | unverified | 2020 | 7 days 17:30:29 | 7.0 |
| 2785 | t1_ghrcksp | /r/Makeup/comments/kogfek/amber_heard_the_face... | I figure it’s just Pirates of the Caribbean st... | t1_ghrbajl | r/Makeup | 2021-01-01 21:00:15 | Negative | Negative | 48.0 | comment | ... | False | False | False | 13294.0 | 3484.0 | 2020-12-25 03:08:50 | unverified | 2020 | 7 days 17:51:25 | 7.0 |
| 2904 | t1_ghse9qw | /r/Vent/comments/jpgsyv/i_fucking_hate_amber_h... | JOHNNY DEPP IS A SCUMBAG DRUG ADDICT PIECE OF ... | t3_jpgsyv | r/Vent | 2021-01-02 02:43:56 | Negative | Negative | 1.0 | submission | ... | False | False | False | 188.0 | 20.0 | 2020-12-28 04:17:41 | others | 2020 | 4 days 22:26:15 | 4.0 |
| 2905 | t1_ghsebqn | /r/Vent/comments/jpgsyv/i_fucking_hate_amber_h... | GO FUCK YOURSELF | t1_gbf65ke | r/Vent | 2021-01-02 02:44:27 | Negative | Negative | 0.0 | comment | ... | False | False | False | 188.0 | 20.0 | 2020-12-28 04:17:41 | others | 2020 | 4 days 22:26:46 | 4.0 |
| 2906 | t1_ghsen7o | /r/Vent/comments/jpgsyv/i_fucking_hate_amber_h... | FUCK YOU AND FUCK JOHNNY DEPP | t1_gbhcttr | r/Vent | 2021-01-02 02:47:19 | Negative | Negative | -1.0 | comment | ... | False | False | False | 188.0 | 20.0 | 2020-12-28 04:17:41 | others | 2020 | 4 days 22:29:38 | 4.0 |
5 rows × 24 columns
# suspected5 = (df_merged_30.user_name).tolist()
# set(suspected5) & set(suspected_list)
# suspected_list = suspected_list + suspected5
# with pd.option_context('display.max_colwidth', None):
# display(df_merged.query("user_name == 'LoveAmberHeard42286' ").text.head())
looks like this account is not suspected, so we are going to remove from the suspected_list
# suspected_list.remove('LoveAmberHeard42286')
# len(suspected_list)
Estimation of Number of User Accounts Created in each year / having contributions in 2021¶
# group by creation year and count
df_contributions = df_merged.groupby(df_merged['user_created_at'].dt.year).size().reset_index(name='n_accounts')
fig = px.bar(df_contributions,
x='user_created_at', y='n_accounts', text='n_accounts', title='Number of User Accounts Created in each year / having contributions in 2021')
fig.update_traces(marker_color='#5296dd',
marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
Contributions count over Months in 2021¶
# base_color = sb.color_palette()[0]
# clrs = ['red' if (y > 4000) else base_color for y in df.groupby(df.created_at.dt.month).size()]
# fig, ax = plt.subplots()
# fig.set_size_inches(11.7, 8.27)
# sb.countplot(x = df['created_at'].dt.month, data = df, palette=clrs)
# plt.xlabel('Months', fontsize=14)
# plt.ylabel('Contributions Count', fontsize=14)
# plt.title('Contributions count over Months in 2021', fontsize=18)
# plt.show()
fig = px.bar(df.groupby(df['created_at'].dt.month).size().reset_index(name='contribution_count'),
x='created_at', y='contribution_count', text='contribution_count')
fig.update_layout(
title={
'text': "Estimation of the number contributions created in each month of 2021",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
})
fig.update_layout(
xaxis = dict(
title='Month(2021)',
tickmode = 'array',
tickvals = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
ticktext = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
)
)
clrs = ['red' if (y > 4000) else '#5296dd' for y in df.groupby(df['created_at'].dt.month).size()]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
Contributions count over Days of month in 2021¶
# base_color = sb.color_palette()[0]
# clrs = ['red' if (y > 1500) else base_color for y in df.groupby(df.created_at.dt.day).size()]
# fig, ax = plt.subplots()
# fig.set_size_inches(11.7, 8.27)
# sb.countplot(x = df['created_at'].dt.day, data = df, palette=clrs)
# plt.xlabel('Months', fontsize=14)
# plt.ylabel('Contributions Count', fontsize=14)
# plt.title('Contributions count over Days of month in 2021', fontsize=18)
# plt.show()
fig = px.bar(df.groupby(df['created_at'].dt.day).size().reset_index(name='contribution_count'),
x='created_at', y='contribution_count', text='contribution_count')
fig.update_layout(
title={
'text': "Estimation of the number contributions created in each DayOfMonth in 2021",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
})
fig.update_layout(
xaxis = dict(
title='Month Days(2021)',
tickmode = 'linear',
)
)
clrs = ['red' if (y > 1500) else '#5296dd' for y in df.groupby(df['created_at'].dt.day).size()]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
In Which DayOfWeek users created more?¶
week_day = df['created_at'].dt.strftime('%a')
# one can sort by any order by providing a custom index explicitely :
# https://stackoverflow.com/questions/43855474/changing-sort-in-value-counts/43855492
week_sorted = week_day.value_counts()[['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']]
week_sorted
Mon 2172 Tue 2640 Wed 1824 Thu 2160 Fri 1779 Sat 4763 Sun 2967 Name: created_at, dtype: int64
fig = px.bar(df.groupby(df['created_at'].dt.dayofweek).size().reset_index(name='contribution_count'),
x='created_at', y='contribution_count', text='contribution_count')
fig.update_layout(
title={
'text': "Estimation of the number contributions created in each DayOfWeek (2021)",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
})
fig.update_layout(
xaxis = dict(
title='DayOfWeek(2021)',
tickmode = 'array',
tickvals = [0, 1, 2, 3, 4, 5, 6],
ticktext = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
)
)
clrs = ['red' if (y > 4000) else '#5296dd' for y in df.groupby(df['created_at'].dt.dayofweek).size()]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto')
fig.show()
check for the hour the contributions were made (2021)¶
# check for the hour the contributions were made
df_hours = df.groupby(df['created_at'].dt.hour).size().reset_index(name='contribution_count')
# df_hours.sort_values('contribution_count', ascending=False);
fig = px.bar(df_hours,
x='created_at', y='contribution_count',
title='Number of contrbutions Comment/Submission in day hours (2021)')
fig.update_layout(
xaxis = dict(
title='Hours of Day',
tickmode = 'linear',
dtick = 1
)
)
fig.update_traces(marker_color='#5296dd',
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
It's weird to have high contributions all the day!!
Which dates has the highest contrbitions for users?¶
df.created_at.dt.date.value_counts().head()
2021-04-17 1341 2021-02-20 931 2021-02-28 855 2021-05-11 612 2021-03-04 606 Name: created_at, dtype: int64
# # group by date an count
# trendy_dates = df.groupby(df['created_at'].dt.date).agg('count')['created_at'].to_frame()
# # naming the count column as contribution_count
# trendy_dates.columns = ['contribution_count']
# trendy_dates.sort_values('contribution_count', ascending=False, inplace=True)
# trendy_dates = trendy_dates.reset_index()
trendy_dates = df.groupby(df['created_at'].dt.date).size().reset_index(name='contribution_count')
fig = px.bar(trendy_dates,
x='created_at', y='contribution_count')
fig.update_layout(
title={
'text': "The number of contributions created in each date",
'x':0.5,
'xanchor': 'center',
'yanchor': 'top'
})
fig.update_traces(marker_color='#5296dd',
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
trendy_dates.sort_values('contribution_count', ascending=False)
| created_at | contribution_count | |
|---|---|---|
| 106 | 2021-04-17 | 1341 |
| 50 | 2021-02-20 | 931 |
| 58 | 2021-02-28 | 855 |
| 130 | 2021-05-11 | 612 |
| 62 | 2021-03-04 | 606 |
| ... | ... | ... |
| 80 | 2021-03-22 | 3 |
| 99 | 2021-04-10 | 3 |
| 101 | 2021-04-12 | 2 |
| 79 | 2021-03-21 | 2 |
| 102 | 2021-04-13 | 2 |
151 rows × 2 columns
# get the top 5 trendy dates first, then sort them by date
top_trendy_dates = trendy_dates.head(5)
top_trendy_dates.sort_values('created_at', inplace=True)
top_trendy_dates
| created_at | contribution_count | |
|---|---|---|
| 0 | 2021-01-01 | 470 |
| 1 | 2021-01-02 | 197 |
| 2 | 2021-01-03 | 163 |
| 3 | 2021-01-04 | 167 |
| 4 | 2021-01-05 | 320 |
top_trendy_dates.reset_index(inplace=True)
fig = px.bar(top_trendy_dates,
x='created_at', y='contribution_count', title='Number of contrbutions Comment/Submission in trendy dates')
fig.update_layout(
xaxis = dict(
title='Contribution Date',
tickmode = 'array',
tickvals = top_trendy_dates.created_at,
)
)
clrs = ['red' if (y > 400) else '#5296dd' for y in top_trendy_dates.contribution_count]
fig.update_traces(marker_color=clrs,
opacity=1, textposition='auto').update_layout()
# marker_line_width=1.5,
fig.show()
# check for the number of accounts contributed on the peak day (Apr 17,2021)
df_merged[df_merged.created_at.dt.strftime('%Y-%m-%d') == '2021-04-17'].user_name.nunique()
699
df_merged_peak1 = df_merged[df_merged.created_at.dt.strftime('%Y-%m-%d') == '2021-04-17']
# check for the year the accounts were created
df_user_year1 = df_merged_peak1.groupby(df_merged_peak1['user_created_at'].dt.year).size().reset_index(name='contribution_count')
fig = px.bar(df_user_year1,
x='user_created_at', y='contribution_count',
title='The creation year of the accounts contributed on the peak day (Apr 17,2021)')
fig.update_layout(
xaxis = dict(
title='Accout Creation Year',
tickmode = 'linear',
dtick = 1
)
)
clrs = ['red' if (y > 250) else '#5296dd' for y in df_user_year1.contribution_count]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
df_peak_1 = df[df.created_at.dt.strftime('%Y-%m-%d') == '2021-04-17']
# check for the hour the contributions were made
df_hours = df_peak_1.groupby(df['created_at'].dt.hour).size().reset_index(name='contribution_count')
# df_hours.sort_values('contribution_count', ascending=False);
fig = px.bar(df_hours,
x='created_at', y='contribution_count',
title='Number of contrbutions Comment/Submission in day hours')
fig.update_layout(
xaxis = dict(
title='Hours of Day',
tickmode = 'linear',
dtick = 1
)
)
clrs = ['red' if (y > 80) else '#5296dd' for y in df_hours.contribution_count]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
# check for the number of accounts contributed on the peak day (Feb 20,2021)
df_merged[df_merged.created_at.dt.strftime('%Y-%m-%d') == '2021-02-20'].user_name.nunique()
626
df_merged_peak2 = df_merged[df_merged.created_at.dt.strftime('%Y-%m-%d') == '2021-02-20']
# check for the year the accounts were created
df_user_year2 = df_merged_peak2.groupby(df_merged_peak2['user_created_at'].dt.year).size().reset_index(name='contribution_count')
fig = px.bar(df_user_year2,
x='user_created_at', y='contribution_count',
title='The creation year of the accounts contributed on the peak day (Feb 20,2021)')
fig.update_layout(
xaxis = dict(
title='Accout Creation Year',
tickmode = 'linear',
dtick = 1
)
)
clrs = ['red' if (y > 100) else '#5296dd' for y in df_user_year2.contribution_count]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
df_peak_2 = df[df.created_at.dt.strftime('%Y-%m-%d') == '2021-02-20']
# check for the hour the contributions were made
df_hours = df_peak_2.groupby(df['created_at'].dt.hour).size().reset_index(name='contribution_count')
# df_hours.sort_values('contribution_count', ascending=False);
fig = px.bar(df_hours,
x='created_at', y='contribution_count',
title='Number of contrbutions Comment/Submission in day hours')
fig.update_layout(
xaxis = dict(
title='Hours of Day',
tickmode = 'linear',
dtick = 1
)
)
clrs = ['red' if (y > 200) else '#5296dd' for y in df_hours['contribution_count']]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
# check for the number of accounts contributed on the peak day (Feb 28,2021)
df_merged[df_merged.created_at.dt.strftime('%Y-%m-%d') == '2021-02-28'].user_name.nunique()
617
df_merged_peak3 = df_merged[df_merged.created_at.dt.strftime('%Y-%m-%d') == '2021-02-28']
# check for the year the accounts were created
df_user_year3 = df_merged_peak3.groupby(df_merged_peak3['user_created_at'].dt.year).size().reset_index(name='contribution_count')
fig = px.bar(df_user_year3,
x='user_created_at', y='contribution_count',
title='The creation year of the accounts contributed on the peak day (Feb 28,2021)')
fig.update_layout(
xaxis = dict(
title='Accout Creation Year',
tickmode = 'linear',
dtick = 1
)
)
clrs = ['red' if (y > 140) else '#5296dd' for y in df_user_year3['contribution_count']]
fig.update_traces(marker_color=clrs,
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
# # check for the accounts contributed on the peak day (Feb 28,2021)
# df_merged[df_merged.created_at.dt.strftime('%Y-%m-%d') == '2021-02-28']\
# .user_created_at.dt.year.value_counts().sort_index().plot(kind='bar', figsize=(13,8));
# # plt.gca().invert_xaxis()
# plt.title('The creation year of the accounts contributed on the peak day (Feb 28,2021)', fontsize=18);
# plt.xlabel('Accout Creation Year', fontsize=14);
# plt.ylabel('n_contributions', fontsize=14);
df_peak_3 = df[df.created_at.dt.strftime('%Y-%m-%d') == '2021-02-28']
# check for the hour the contributions were made
df_hours = df.groupby(df['created_at'].dt.hour).size().reset_index(name='contribution_count')
# df_hours.sort_values('contribution_count', ascending=False);
fig = px.bar(df_hours,
x='created_at', y='contribution_count',
title='Number of contrbutions Comment/Submission in day hours')
fig.update_layout(
xaxis = dict(
title='Hours of Day',
tickmode = 'linear',
dtick = 1
)
)
fig.update_traces(marker_color='#5296dd',
marker_line_width=1.5, opacity=1, textposition='auto').update_layout()
fig.show()
# df_merged.hist(figsize=(21,21), color='#5296dd', xrot = 90);
# # plt.xticks(rotation=90);